Bug Summary

File:Externals/SOIL/stb_image_aug.c
Location:line 3264, column 5
Description:Value stored to 'count' is never read

Annotated Source Code

1/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
2 when you control the images you're loading
3
4 QUICK NOTES:
5 Primarily of interest to game developers and other people who can
6 avoid problematic images and only need the trivial interface
7
8 JPEG baseline (no JPEG progressive, no oddball channel decimations)
9 PNG 8-bit only
10 BMP non-1bpp, non-RLE
11 TGA (not sure what subset, if a subset)
12 PSD (composited view only, no extra channels)
13 HDR (radiance rgbE format)
14 writes BMP,TGA (define STBI_NO_WRITE to remove code)
15 decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
16 supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
17
18 TODO:
19 stbi_info_*
20
21 history:
22 1.18 fix a threading bug (local mutable static)
23 1.17 support interlaced PNG
24 1.16 major bugfix - convert_format converted one too many pixels
25 1.15 initialize some fields for thread safety
26 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
27 1.13 threadsafe
28 1.12 const qualifiers in the API
29 1.11 Support installable IDCT, colorspace conversion routines
30 1.10 Fixes for 64-bit (don't use "unsigned long")
31 optimized upsampling by Fabian "ryg" Giesen
32 1.09 Fix format-conversion for PSD code (bad global variables!)
33 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
34 1.07 attempt to fix C++ warning/errors again
35 1.06 attempt to fix C++ warning/errors again
36 1.05 fix TGA loading to return correct *comp and use good luminance calc
37 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
38 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
39 1.02 support for (subset of) HDR files, float interface for preferred access to them
40 1.01 fix bug: possible bug in handling right-side up bmps... not sure
41 fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
42 1.00 interface to zlib that skips zlib header
43 0.99 correct handling of alpha in palette
44 0.98 TGA loader by lonesock; dynamically add loaders (untested)
45 0.97 jpeg errors on too large a file; also catch another malloc failure
46 0.96 fix detection of invalid v value - particleman@mollyrocket forum
47 0.95 during header scan, seek to markers in case of padding
48 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
49 0.93 handle jpegtran output; verbose errors
50 0.92 read 4,8,16,24,32-bit BMP files of several formats
51 0.91 output 24-bit Windows 3.0 BMP files
52 0.90 fix a few more warnings; bump version number to approach 1.0
53 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
54 0.60 fix compiling as c++
55 0.59 fix warnings: merge Dave Moore's -Wall fixes
56 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
57 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less
58 than 16 available
59 0.56 fix bug: zlib uncompressed mode len vs. nlen
60 0.55 fix bug: restart_interval not initialized to 0
61 0.54 allow NULL for 'int *comp'
62 0.53 fix bug in png 3->4; speedup png decoding
63 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
64 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
65 on 'test' only check type, not whether we support this variant
66*/
67
68#include "stb_image_aug.h"
69
70#ifndef STBI_NO_HDR
71#include <math.h> // ldexp
72#include <string.h> // strcmp
73#endif
74
75#ifndef STBI_NO_STDIO
76#include <stdio.h>
77#endif
78#include <stdlib.h>
79#include <memory.h>
80#include <assert.h>
81#include <stdarg.h>
82
83#ifndef _MSC_VER
84 #ifdef __cplusplus
85 #define __forceinline inline
86 #else
87 #define __forceinline
88 #endif
89#endif
90
91
92// implementation:
93typedef unsigned char uint8;
94typedef unsigned short uint16;
95typedef signed short int16;
96typedef unsigned int uint32;
97typedef signed int int32;
98typedef unsigned int uint;
99
100// should produce compiler error if size is wrong
101typedef unsigned char validate_uint32[sizeof(uint32)==4];
102
103#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
104#define STBI_NO_WRITE
105#endif
106
107#ifndef STBI_NO_DDS
108#include "stbi_DDS_aug.h"
109#endif
110
111// I (JLD) want full messages for SOIL
112#define STBI_FAILURE_USERMSG1 1
113
114//////////////////////////////////////////////////////////////////////////////
115//
116// Generic API that works on all image types
117//
118
119// this is not threadsafe
120static const char *failure_reason;
121
122const char *stbi_failure_reason(void)
123{
124 return failure_reason;
125}
126
127static int e(const char *str)
128{
129 failure_reason = str;
130 return 0;
131}
132
133#ifdef STBI_NO_FAILURE_STRINGS
134 #define e(x,y)e(y) 0
135#elif defined(STBI_FAILURE_USERMSG1)
136 #define e(x,y)e(y) e(y)
137#else
138 #define e(x,y)e(y) e(x)
139#endif
140
141#define epf(x,y)((float *) (e(y)?((void*)0):((void*)0))) ((float *) (e(x,y)e(y)?NULL((void*)0):NULL((void*)0)))
142#define epuc(x,y)((unsigned char *) (e(y)?((void*)0):((void*)0))) ((unsigned char *) (e(x,y)e(y)?NULL((void*)0):NULL((void*)0)))
143
144void stbi_image_free(void *retval_from_stbi_load)
145{
146 free(retval_from_stbi_load);
147}
148
149#define MAX_LOADERS32 32
150stbi_loader *loaders[MAX_LOADERS32];
151static int max_loaders = 0;
152
153int stbi_register_loader(stbi_loader *loader)
154{
155 int i;
156 for (i=0; i < MAX_LOADERS32; ++i) {
157 // already present?
158 if (loaders[i] == loader)
159 return 1;
160 // end of the list?
161 if (loaders[i] == NULL((void*)0)) {
162 loaders[i] = loader;
163 max_loaders = i+1;
164 return 1;
165 }
166 }
167 // no room for it
168 return 0;
169}
170
171#ifndef STBI_NO_HDR
172static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
173static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp);
174#endif
175
176#ifndef STBI_NO_STDIO
177unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
178{
179 FILE *f = fopen(filename, "rb");
180 unsigned char *result;
181 if (!f) return epuc("can't fopen", "Unable to open file")((unsigned char *) (e("Unable to open file")?((void*)0):((void
*)0)))
;
182 result = stbi_load_from_file(f,x,y,comp,req_comp);
183 fclose(f);
184 return result;
185}
186
187unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
188{
189 int i;
190 if (stbi_jpeg_test_file(f))
191 return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
192 if (stbi_png_test_file(f))
193 return stbi_png_load_from_file(f,x,y,comp,req_comp);
194 if (stbi_bmp_test_file(f))
195 return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
196 if (stbi_psd_test_file(f))
197 return stbi_psd_load_from_file(f,x,y,comp,req_comp);
198 #ifndef STBI_NO_DDS
199 if (stbi_dds_test_file(f))
200 return stbi_dds_load_from_file(f,x,y,comp,req_comp);
201 #endif
202 #ifndef STBI_NO_HDR
203 if (stbi_hdr_test_file(f)) {
204 float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
205 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
206 }
207 #endif
208 for (i=0; i < max_loaders; ++i)
209 if (loaders[i]->test_file(f))
210 return loaders[i]->load_from_file(f,x,y,comp,req_comp);
211 // test tga last because it's a crappy test!
212 if (stbi_tga_test_file(f))
213 return stbi_tga_load_from_file(f,x,y,comp,req_comp);
214 return epuc("unknown image type", "Image not of any known type, or corrupt")((unsigned char *) (e("Image not of any known type, or corrupt"
)?((void*)0):((void*)0)))
;
215}
216#endif
217
218unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
219{
220 int i;
221 if (stbi_jpeg_test_memory(buffer,len))
222 return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
223 if (stbi_png_test_memory(buffer,len))
224 return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
225 if (stbi_bmp_test_memory(buffer,len))
226 return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
227 if (stbi_psd_test_memory(buffer,len))
228 return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
229 #ifndef STBI_NO_DDS
230 if (stbi_dds_test_memory(buffer,len))
231 return stbi_dds_load_from_memory(buffer,len,x,y,comp,req_comp);
232 #endif
233 #ifndef STBI_NO_HDR
234 if (stbi_hdr_test_memory(buffer, len)) {
235 float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
236 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
237 }
238 #endif
239 for (i=0; i < max_loaders; ++i)
240 if (loaders[i]->test_memory(buffer,len))
241 return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
242 // test tga last because it's a crappy test!
243 if (stbi_tga_test_memory(buffer,len))
244 return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
245 return epuc("unknown image type", "Image not of any known type, or corrupt")((unsigned char *) (e("Image not of any known type, or corrupt"
)?((void*)0):((void*)0)))
;
246}
247
248#ifndef STBI_NO_HDR
249
250#ifndef STBI_NO_STDIO
251float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
252{
253 FILE *f = fopen(filename, "rb");
254 float *result;
255 if (!f) return epf("can't fopen", "Unable to open file")((float *) (e("Unable to open file")?((void*)0):((void*)0)));
256 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
257 fclose(f);
258 return result;
259}
260
261float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
262{
263 unsigned char *data;
264 #ifndef STBI_NO_HDR
265 if (stbi_hdr_test_file(f))
266 return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
267 #endif
268 data = stbi_load_from_file(f, x, y, comp, req_comp);
269 if (data)
270 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
271 return epf("unknown image type", "Image not of any known type, or corrupt")((float *) (e("Image not of any known type, or corrupt")?((void
*)0):((void*)0)))
;
272}
273#endif
274
275float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
276{
277 stbi_uc *data;
278 #ifndef STBI_NO_HDR
279 if (stbi_hdr_test_memory(buffer, len))
280 return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
281 #endif
282 data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
283 if (data)
284 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
285 return epf("unknown image type", "Image not of any known type, or corrupt")((float *) (e("Image not of any known type, or corrupt")?((void
*)0):((void*)0)))
;
286}
287#endif
288
289// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
290// defined, for API simplicity; if STBI_NO_HDR is defined, it always
291// reports false!
292
293int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
294{
295 #ifndef STBI_NO_HDR
296 return stbi_hdr_test_memory(buffer, len);
297 #else
298 return 0;
299 #endif
300}
301
302#ifndef STBI_NO_STDIO
303extern int stbi_is_hdr (char const *filename)
304{
305 FILE *f = fopen(filename, "rb");
306 int result=0;
307 if (f) {
308 result = stbi_is_hdr_from_file(f);
309 fclose(f);
310 }
311 return result;
312}
313
314extern int stbi_is_hdr_from_file(FILE *f)
315{
316 #ifndef STBI_NO_HDR
317 return stbi_hdr_test_file(f);
318 #else
319 return 0;
320 #endif
321}
322
323#endif
324
325// @TODO: get image dimensions & components without fully decoding
326#ifndef STBI_NO_STDIO
327extern int stbi_info (char const *filename, int *x, int *y, int *comp);
328extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
329#endif
330extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
331
332#ifndef STBI_NO_HDR
333static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
334static float l2h_gamma=2.2f, l2h_scale=1.0f;
335
336void stbi_hdr_to_ldr_gamma(float gammafactor) { h2l_gamma_i = 1/gammafactor; }
337void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
338
339void stbi_ldr_to_hdr_gamma(float gammafactor) { l2h_gamma = gammafactor; }
340void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
341#endif
342
343
344//////////////////////////////////////////////////////////////////////////////
345//
346// Common code used by all image loaders
347//
348
349enum
350{
351 SCAN_load=0,
352 SCAN_type,
353 SCAN_header,
354};
355
356typedef struct
357{
358 uint32 img_x, img_y;
359 int img_n, img_out_n;
360
361 #ifndef STBI_NO_STDIO
362 FILE *img_file;
363 #endif
364 uint8 *img_buffer, *img_buffer_end;
365} stbi;
366
367#ifndef STBI_NO_STDIO
368static void start_file(stbi *s, FILE *f)
369{
370 s->img_file = f;
371 s->img_buffer = NULL((void*)0);
372 s->img_buffer_end = NULL((void*)0);
373}
374#endif
375
376static void start_mem(stbi *s, uint8 const *buffer, int len)
377{
378#ifndef STBI_NO_STDIO
379 s->img_file = NULL((void*)0);
380#endif
381 s->img_buffer = (uint8 *) buffer;
382 s->img_buffer_end = (uint8 *) buffer+len;
383}
384
385__forceinline static int get8(stbi *s)
386{
387#ifndef STBI_NO_STDIO
388 if (s->img_file) {
389 int c = fgetc(s->img_file);
390 return c == EOF(-1) ? 0 : c;
391 }
392#endif
393 if (s->img_buffer < s->img_buffer_end)
394 return *s->img_buffer++;
395 return 0;
396}
397
398__forceinline static int at_eof(stbi *s)
399{
400#ifndef STBI_NO_STDIO
401 if (s->img_file)
402 return feof(s->img_file);
403#endif
404 return s->img_buffer >= s->img_buffer_end;
405}
406
407__forceinline static uint8 get8u(stbi *s)
408{
409 return (uint8) get8(s);
410}
411
412static void skip(stbi *s, int n)
413{
414#ifndef STBI_NO_STDIO
415 if (s->img_file)
416 fseek(s->img_file, n, SEEK_CUR1);
417 else
418#endif
419 s->img_buffer += n;
420}
421
422static uint16 get16(stbi *s)
423{
424 int z = get8(s);
425 return (z << 8) + get8(s);
426}
427
428static uint32 get32(stbi *s)
429{
430 uint32 z = get16(s);
431 return (z << 16) + get16(s);
432}
433
434static uint16 get16le(stbi *s)
435{
436 int z = get8(s);
437 return z + (get8(s) << 8);
438}
439
440static uint32 get32le(stbi *s)
441{
442 uint32 z = get16le(s);
443 return z + (get16le(s) << 16);
444}
445
446static void getn(stbi *s, stbi_uc *buffer, int n)
447{
448#ifndef STBI_NO_STDIO
449 if (s->img_file) {
450 fread(buffer, 1, n, s->img_file);
451 return;
452 }
453#endif
454 memcpy(buffer, s->img_buffer, n);
455 s->img_buffer += n;
456}
457
458//////////////////////////////////////////////////////////////////////////////
459//
460// generic converter from built-in img_n to req_comp
461// individual types do this automatically as much as possible (e.g. jpeg
462// does all cases internally since it needs to colorspace convert anyway,
463// and it never has alpha, so very few cases ). png can automatically
464// interleave an alpha=255 channel, but falls back to this for other cases
465//
466// assume data buffer is malloced, so malloc a new one and free that one
467// only failure mode is malloc failing
468
469static uint8 compute_y(int r, int g, int b)
470{
471 return (uint8) (((r*77) + (g*150) + (29*b)) >> 8);
472}
473
474static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
475{
476 int i,j;
477 unsigned char *good;
478
479 if (req_comp == img_n) return data;
480 assert(req_comp >= 1 && req_comp <= 4)((void) (0));
481
482 good = (unsigned char *) malloc(req_comp * x * y);
483 if (good == NULL((void*)0)) {
484 free(data);
485 return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
;
486 }
487
488 for (j=0; j < (int) y; ++j) {
489 unsigned char *src = data + j * x * img_n ;
490 unsigned char *dest = good + j * x * req_comp;
491
492 #define COMBO(a,b)((a)*8 +(b)) ((a)*8+(b))
493 #define CASE(a,b) case COMBO(a,b)((a)*8 +(b)): for(i=x-1; i >= 0; --i, src += a, dest += b)
494 // convert source image with img_n components to one with req_comp components;
495 // avoid switch per pixel, so use switch per scanline and massive macros
496 switch(COMBO(img_n, req_comp)((img_n)*8 +(req_comp))) {
497 CASE(1,2) dest[0]=src[0], dest[1]=255; break;
498 CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
499 CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
500 CASE(2,1) dest[0]=src[0]; break;
501 CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
502 CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
503 CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
504 CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
505 CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
506 CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
507 CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
508 CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
509 default: assert(0)((void) (0));
510 }
511 #undef CASE
512 }
513
514 free(data);
515 return good;
516}
517
518#ifndef STBI_NO_HDR
519static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
520{
521 int i,k,n;
522 float *output = (float *) malloc(x * y * comp * sizeof(float));
523 if (output == NULL((void*)0)) { free(data); return epf("outofmem", "Out of memory")((float *) (e("Out of memory")?((void*)0):((void*)0))); }
524 // compute number of non-alpha components
525 if (comp & 1) n = comp; else n = comp-1;
526 for (i=0; i < x*y; ++i) {
527 for (k=0; k < n; ++k) {
528 output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
529 }
530 if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
531 }
532 free(data);
533 return output;
534}
535
536#define float2int(x)((int) (x)) ((int) (x))
537static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp)
538{
539 int i,k,n;
540 stbi_uc *output = (stbi_uc *) malloc(x * y * comp);
541 if (output == NULL((void*)0)) { free(data); return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
; }
542 // compute number of non-alpha components
543 if (comp & 1) n = comp; else n = comp-1;
544 for (i=0; i < x*y; ++i) {
545 for (k=0; k < n; ++k) {
546 float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
547 if (z < 0) z = 0;
548 if (z > 255) z = 255;
549 output[i*comp + k] = float2int(z)((int) (z));
550 }
551 if (k < comp) {
552 float z = data[i*comp+k] * 255 + 0.5f;
553 if (z < 0) z = 0;
554 if (z > 255) z = 255;
555 output[i*comp + k] = float2int(z)((int) (z));
556 }
557 }
558 free(data);
559 return output;
560}
561#endif
562
563//////////////////////////////////////////////////////////////////////////////
564//
565// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
566//
567// simple implementation
568// - channel subsampling of at most 2 in each dimension
569// - doesn't support delayed output of y-dimension
570// - simple interface (only one output format: 8-bit interleaved RGB)
571// - doesn't try to recover corrupt jpegs
572// - doesn't allow partial loading, loading multiple at once
573// - still fast on x86 (copying globals into locals doesn't help x86)
574// - allocates lots of intermediate memory (full size of all components)
575// - non-interleaved case requires this anyway
576// - allows good upsampling (see next)
577// high-quality
578// - upsampled channels are bilinearly interpolated, even across blocks
579// - quality integer IDCT derived from IJG's 'slow'
580// performance
581// - fast huffman; reasonable integer IDCT
582// - uses a lot of intermediate memory, could cache poorly
583// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
584// stb_jpeg: 1.34 seconds (MSVC6, default release build)
585// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro)
586// IJL11.dll: 1.08 seconds (compiled by intel)
587// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG)
588// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro)
589
590// huffman decoding acceleration
591#define FAST_BITS9 9 // larger handles more cases; smaller stomps less cache
592
593typedef struct
594{
595 uint8 fast[1 << FAST_BITS9];
596 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
597 uint16 code[256];
598 uint8 values[256];
599 uint8 size[257];
600 unsigned int maxcode[18];
601 int delta[17]; // old 'firstsymbol' - old 'firstcode'
602} huffman;
603
604typedef struct
605{
606 #if STBI_SIMD
607 unsigned short dequant2[4][64];
608 #endif
609 stbi s;
610 huffman huff_dc[4];
611 huffman huff_ac[4];
612 uint8 dequant[4][64];
613
614// sizes for components, interleaved MCUs
615 int img_h_max, img_v_max;
616 int img_mcu_x, img_mcu_y;
617 int img_mcu_w, img_mcu_h;
618
619// definition of jpeg image component
620 struct
621 {
622 int id;
623 int h,v;
624 int tq;
625 int hd,ha;
626 int dc_pred;
627
628 int x,y,w2,h2;
629 uint8 *data;
630 void *raw_data;
631 uint8 *linebuf;
632 } img_comp[4];
633
634 uint32 code_buffer; // jpeg entropy-coded buffer
635 int code_bits; // number of valid bits
636 unsigned char marker; // marker seen while filling entropy buffer
637 int nomore; // flag if we saw a marker so must stop
638
639 int scan_n, order[4];
640 int restart_interval, todo;
641} jpeg;
642
643static int build_huffman(huffman *h, int *count)
644{
645 int i,j,k=0,code;
646 // build size list for each symbol (from JPEG spec)
647 for (i=0; i < 16; ++i)
648 for (j=0; j < count[i]; ++j)
649 h->size[k++] = (uint8) (i+1);
650 h->size[k] = 0;
651
652 // compute actual symbols (from jpeg spec)
653 code = 0;
654 k = 0;
655 for(j=1; j <= 16; ++j) {
656 // compute delta to add to code to compute symbol id
657 h->delta[j] = k - code;
658 if (h->size[k] == j) {
659 while (h->size[k] == j)
660 h->code[k++] = (uint16) (code++);
661 if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG")e("Corrupt JPEG");
662 }
663 // compute largest code + 1 for this size, preshifted as needed later
664 h->maxcode[j] = code << (16-j);
665 code <<= 1;
666 }
667 h->maxcode[j] = 0xffffffff;
668
669 // build non-spec acceleration table; 255 is flag for not-accelerated
670 memset(h->fast, 255, 1 << FAST_BITS9);
671 for (i=0; i < k; ++i) {
672 int s = h->size[i];
673 if (s <= FAST_BITS9) {
674 int c = h->code[i] << (FAST_BITS9-s);
675 int m = 1 << (FAST_BITS9-s);
676 for (j=0; j < m; ++j) {
677 h->fast[c+j] = (uint8) i;
678 }
679 }
680 }
681 return 1;
682}
683
684static void grow_buffer_unsafe(jpeg *j)
685{
686 do {
687 int b = j->nomore ? 0 : get8(&j->s);
688 if (b == 0xff) {
689 int c = get8(&j->s);
690 if (c != 0) {
691 j->marker = (unsigned char) c;
692 j->nomore = 1;
693 return;
694 }
695 }
696 j->code_buffer = (j->code_buffer << 8) | b;
697 j->code_bits += 8;
698 } while (j->code_bits <= 24);
699}
700
701// (1 << n) - 1
702static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
703
704// decode a jpeg huffman value from the bitstream
705__forceinline static int decode(jpeg *j, huffman *h)
706{
707 unsigned int temp;
708 int c,k;
709
710 if (j->code_bits < 16) grow_buffer_unsafe(j);
711
712 // look at the top FAST_BITS and determine what symbol ID it is,
713 // if the code is <= FAST_BITS
714 c = (j->code_buffer >> (j->code_bits - FAST_BITS9)) & ((1 << FAST_BITS9)-1);
715 k = h->fast[c];
716 if (k < 255) {
717 if (h->size[k] > j->code_bits)
718 return -1;
719 j->code_bits -= h->size[k];
720 return h->values[k];
721 }
722
723 // naive test is to shift the code_buffer down so k bits are
724 // valid, then test against maxcode. To speed this up, we've
725 // preshifted maxcode left so that it has (16-k) 0s at the
726 // end; in other words, regardless of the number of bits, it
727 // wants to be compared against something shifted to have 16;
728 // that way we don't need to shift inside the loop.
729 if (j->code_bits < 16)
730 temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff;
731 else
732 temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff;
733 for (k=FAST_BITS9+1 ; ; ++k)
734 if (temp < h->maxcode[k])
735 break;
736 if (k == 17) {
737 // error! code not found
738 j->code_bits -= 16;
739 return -1;
740 }
741
742 if (k > j->code_bits)
743 return -1;
744
745 // convert the huffman code to the symbol id
746 c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k];
747 assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c])((void) (0));
748
749 // convert the id to a symbol
750 j->code_bits -= k;
751 return h->values[c];
752}
753
754// combined JPEG 'receive' and JPEG 'extend', since baseline
755// always extends everything it receives.
756__forceinline static int extend_receive(jpeg *j, int n)
757{
758 unsigned int m = 1 << (n-1);
759 unsigned int k;
760 if (j->code_bits < n) grow_buffer_unsafe(j);
761 k = (j->code_buffer >> (j->code_bits - n)) & bmask[n];
762 j->code_bits -= n;
763 // the following test is probably a random branch that won't
764 // predict well. I tried to table accelerate it but failed.
765 // maybe it's compiling as a conditional move?
766 if (k < m)
767 return (-1 << n) + k + 1;
768 else
769 return k;
770}
771
772// given a value that's at position X in the zigzag stream,
773// where does it appear in the 8x8 matrix coded as row-major?
774static uint8 dezigzag[64+15] =
775{
776 0, 1, 8, 16, 9, 2, 3, 10,
777 17, 24, 32, 25, 18, 11, 4, 5,
778 12, 19, 26, 33, 40, 48, 41, 34,
779 27, 20, 13, 6, 7, 14, 21, 28,
780 35, 42, 49, 56, 57, 50, 43, 36,
781 29, 22, 15, 23, 30, 37, 44, 51,
782 58, 59, 52, 45, 38, 31, 39, 46,
783 53, 60, 61, 54, 47, 55, 62, 63,
784 // let corrupt input sample past end
785 63, 63, 63, 63, 63, 63, 63, 63,
786 63, 63, 63, 63, 63, 63, 63
787};
788
789// decode one 64-entry block--
790static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
791{
792 int diff,dc,k;
793 int t = decode(j, hdc);
794 if (t < 0) return e("bad huffman code","Corrupt JPEG")e("Corrupt JPEG");
795
796 // 0 all the ac values now so we can do it 32-bits at a time
797 memset(data,0,64*sizeof(data[0]));
798
799 diff = t ? extend_receive(j, t) : 0;
800 dc = j->img_comp[b].dc_pred + diff;
801 j->img_comp[b].dc_pred = dc;
802 data[0] = (short) dc;
803
804 // decode AC components, see JPEG spec
805 k = 1;
806 do {
807 int r,s;
808 int rs = decode(j, hac);
809 if (rs < 0) return e("bad huffman code","Corrupt JPEG")e("Corrupt JPEG");
810 s = rs & 15;
811 r = rs >> 4;
812 if (s == 0) {
813 if (rs != 0xf0) break; // end block
814 k += 16;
815 } else {
816 k += r;
817 // decode into unzigzag'd location
818 data[dezigzag[k++]] = (short) extend_receive(j,s);
819 }
820 } while (k < 64);
821 return 1;
822}
823
824// take a -128..127 value and clamp it and convert to 0..255
825__forceinline static uint8 clamp(int x)
826{
827 x += 128;
828 // trick to use a single test to catch both cases
829 if ((unsigned int) x > 255) {
830 if (x < 0) return 0;
831 if (x > 255) return 255;
832 }
833 return (uint8) x;
834}
835
836#define f2f(x)(int) (((x) * 4096 + 0.5)) (int) (((x) * 4096 + 0.5))
837#define fsh(x)((x) << 12) ((x) << 12)
838
839// derived from jidctint -- DCT_ISLOW
840#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7)int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = s2; p3 = s6;
p1 = (p2+p3) * (int) (((0.5411961f) * 4096 + 0.5)); t2 = p1 +
p3*(int) (((-1.847759065f) * 4096 + 0.5)); t3 = p1 + p2*(int
) (((0.765366865f) * 4096 + 0.5)); p2 = s0; p3 = s4; t0 = ((p2
+p3) << 12); t1 = ((p2-p3) << 12); x0 = t0+t3; x3
= t0-t3; x1 = t1+t2; x2 = t1-t2; t0 = s7; t1 = s5; t2 = s3; t3
= s1; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3; p2 = t1+t2; p5 = (
p3+p4)*(int) (((1.175875602f) * 4096 + 0.5)); t0 = t0*(int) (
((0.298631336f) * 4096 + 0.5)); t1 = t1*(int) (((2.053119869f
) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f) * 4096 + 0.5
)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5)); p1 = p5 + p1
*(int) (((-0.899976223f) * 4096 + 0.5)); p2 = p5 + p2*(int) (
((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int) (((-1.961570560f
) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f) * 4096 + 0.5
)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 += p1+p3;
\
841 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
842 p2 = s2; \
843 p3 = s6; \
844 p1 = (p2+p3) * f2f(0.5411961f)(int) (((0.5411961f) * 4096 + 0.5)); \
845 t2 = p1 + p3*f2f(-1.847759065f)(int) (((-1.847759065f) * 4096 + 0.5)); \
846 t3 = p1 + p2*f2f( 0.765366865f)(int) (((0.765366865f) * 4096 + 0.5)); \
847 p2 = s0; \
848 p3 = s4; \
849 t0 = fsh(p2+p3)((p2+p3) << 12); \
850 t1 = fsh(p2-p3)((p2-p3) << 12); \
851 x0 = t0+t3; \
852 x3 = t0-t3; \
853 x1 = t1+t2; \
854 x2 = t1-t2; \
855 t0 = s7; \
856 t1 = s5; \
857 t2 = s3; \
858 t3 = s1; \
859 p3 = t0+t2; \
860 p4 = t1+t3; \
861 p1 = t0+t3; \
862 p2 = t1+t2; \
863 p5 = (p3+p4)*f2f( 1.175875602f)(int) (((1.175875602f) * 4096 + 0.5)); \
864 t0 = t0*f2f( 0.298631336f)(int) (((0.298631336f) * 4096 + 0.5)); \
865 t1 = t1*f2f( 2.053119869f)(int) (((2.053119869f) * 4096 + 0.5)); \
866 t2 = t2*f2f( 3.072711026f)(int) (((3.072711026f) * 4096 + 0.5)); \
867 t3 = t3*f2f( 1.501321110f)(int) (((1.501321110f) * 4096 + 0.5)); \
868 p1 = p5 + p1*f2f(-0.899976223f)(int) (((-0.899976223f) * 4096 + 0.5)); \
869 p2 = p5 + p2*f2f(-2.562915447f)(int) (((-2.562915447f) * 4096 + 0.5)); \
870 p3 = p3*f2f(-1.961570560f)(int) (((-1.961570560f) * 4096 + 0.5)); \
871 p4 = p4*f2f(-0.390180644f)(int) (((-0.390180644f) * 4096 + 0.5)); \
872 t3 += p1+p4; \
873 t2 += p2+p3; \
874 t1 += p2+p4; \
875 t0 += p1+p3;
876
877#if !STBI_SIMD
878// .344 seconds on 3*anemones.jpg
879static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
880{
881 int i,val[64],*v=val;
882 uint8 *o,*dq = dequantize;
883 short *d = data;
884
885 // columns
886 for (i=0; i < 8; ++i,++d,++dq, ++v) {
887 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
888 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
889 && d[40]==0 && d[48]==0 && d[56]==0) {
890 // no shortcut 0 seconds
891 // (1|2|3|4|5|6|7)==0 0 seconds
892 // all separate -0.047 seconds
893 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
894 int dcterm = d[0] * dq[0] << 2;
895 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
896 } else {
897 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = d[16]*dq[16]
; p3 = d[48]*dq[48]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096
+ 0.5)); t2 = p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5))
; t3 = p1 + p2*(int) (((0.765366865f) * 4096 + 0.5)); p2 = d[
0]*dq[ 0]; p3 = d[32]*dq[32]; t0 = ((p2+p3) << 12); t1
= ((p2-p3) << 12); x0 = t0+t3; x3 = t0-t3; x1 = t1+t2;
x2 = t1-t2; t0 = d[56]*dq[56]; t1 = d[40]*dq[40]; t2 = d[24]
*dq[24]; t3 = d[ 8]*dq[ 8]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
898 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = d[16]*dq[16]
; p3 = d[48]*dq[48]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096
+ 0.5)); t2 = p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5))
; t3 = p1 + p2*(int) (((0.765366865f) * 4096 + 0.5)); p2 = d[
0]*dq[ 0]; p3 = d[32]*dq[32]; t0 = ((p2+p3) << 12); t1
= ((p2-p3) << 12); x0 = t0+t3; x3 = t0-t3; x1 = t1+t2;
x2 = t1-t2; t0 = d[56]*dq[56]; t1 = d[40]*dq[40]; t2 = d[24]
*dq[24]; t3 = d[ 8]*dq[ 8]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
899 // constants scaled things up by 1<<12; let's bring them back
900 // down, but keep 2 extra bits of precision
901 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
902 v[ 0] = (x0+t3) >> 10;
903 v[56] = (x0-t3) >> 10;
904 v[ 8] = (x1+t2) >> 10;
905 v[48] = (x1-t2) >> 10;
906 v[16] = (x2+t1) >> 10;
907 v[40] = (x2-t1) >> 10;
908 v[24] = (x3+t0) >> 10;
909 v[32] = (x3-t0) >> 10;
910 }
911 }
912
913 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
914 // no fast case since the first 1D IDCT spread components out
915 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = v[2]; p3 = v
[6]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096 + 0.5)); t2 =
p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5)); t3 = p1 + p2
*(int) (((0.765366865f) * 4096 + 0.5)); p2 = v[0]; p3 = v[4];
t0 = ((p2+p3) << 12); t1 = ((p2-p3) << 12); x0 =
t0+t3; x3 = t0-t3; x1 = t1+t2; x2 = t1-t2; t0 = v[7]; t1 = v
[5]; t2 = v[3]; t3 = v[1]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
916 // constants scaled things up by 1<<12, plus we had 1<<2 from first
917 // loop, plus horizontal and vertical each scale by sqrt(8) so together
918 // we've got an extra 1<<3, so 1<<17 total we need to remove.
919 x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
920 o[0] = clamp((x0+t3) >> 17);
921 o[7] = clamp((x0-t3) >> 17);
922 o[1] = clamp((x1+t2) >> 17);
923 o[6] = clamp((x1-t2) >> 17);
924 o[2] = clamp((x2+t1) >> 17);
925 o[5] = clamp((x2-t1) >> 17);
926 o[3] = clamp((x3+t0) >> 17);
927 o[4] = clamp((x3-t0) >> 17);
928 }
929}
930#else
931static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize)
932{
933 int i,val[64],*v=val;
934 uint8 *o;
935 unsigned short *dq = dequantize;
936 short *d = data;
937
938 // columns
939 for (i=0; i < 8; ++i,++d,++dq, ++v) {
940 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
941 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
942 && d[40]==0 && d[48]==0 && d[56]==0) {
943 // no shortcut 0 seconds
944 // (1|2|3|4|5|6|7)==0 0 seconds
945 // all separate -0.047 seconds
946 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
947 int dcterm = d[0] * dq[0] << 2;
948 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
949 } else {
950 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = d[16]*dq[16]
; p3 = d[48]*dq[48]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096
+ 0.5)); t2 = p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5))
; t3 = p1 + p2*(int) (((0.765366865f) * 4096 + 0.5)); p2 = d[
0]*dq[ 0]; p3 = d[32]*dq[32]; t0 = ((p2+p3) << 12); t1
= ((p2-p3) << 12); x0 = t0+t3; x3 = t0-t3; x1 = t1+t2;
x2 = t1-t2; t0 = d[56]*dq[56]; t1 = d[40]*dq[40]; t2 = d[24]
*dq[24]; t3 = d[ 8]*dq[ 8]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
951 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = d[16]*dq[16]
; p3 = d[48]*dq[48]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096
+ 0.5)); t2 = p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5))
; t3 = p1 + p2*(int) (((0.765366865f) * 4096 + 0.5)); p2 = d[
0]*dq[ 0]; p3 = d[32]*dq[32]; t0 = ((p2+p3) << 12); t1
= ((p2-p3) << 12); x0 = t0+t3; x3 = t0-t3; x1 = t1+t2;
x2 = t1-t2; t0 = d[56]*dq[56]; t1 = d[40]*dq[40]; t2 = d[24]
*dq[24]; t3 = d[ 8]*dq[ 8]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
952 // constants scaled things up by 1<<12; let's bring them back
953 // down, but keep 2 extra bits of precision
954 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
955 v[ 0] = (x0+t3) >> 10;
956 v[56] = (x0-t3) >> 10;
957 v[ 8] = (x1+t2) >> 10;
958 v[48] = (x1-t2) >> 10;
959 v[16] = (x2+t1) >> 10;
960 v[40] = (x2-t1) >> 10;
961 v[24] = (x3+t0) >> 10;
962 v[32] = (x3-t0) >> 10;
963 }
964 }
965
966 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
967 // no fast case since the first 1D IDCT spread components out
968 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; p2 = v[2]; p3 = v
[6]; p1 = (p2+p3) * (int) (((0.5411961f) * 4096 + 0.5)); t2 =
p1 + p3*(int) (((-1.847759065f) * 4096 + 0.5)); t3 = p1 + p2
*(int) (((0.765366865f) * 4096 + 0.5)); p2 = v[0]; p3 = v[4];
t0 = ((p2+p3) << 12); t1 = ((p2-p3) << 12); x0 =
t0+t3; x3 = t0-t3; x1 = t1+t2; x2 = t1-t2; t0 = v[7]; t1 = v
[5]; t2 = v[3]; t3 = v[1]; p3 = t0+t2; p4 = t1+t3; p1 = t0+t3
; p2 = t1+t2; p5 = (p3+p4)*(int) (((1.175875602f) * 4096 + 0.5
)); t0 = t0*(int) (((0.298631336f) * 4096 + 0.5)); t1 = t1*(int
) (((2.053119869f) * 4096 + 0.5)); t2 = t2*(int) (((3.072711026f
) * 4096 + 0.5)); t3 = t3*(int) (((1.501321110f) * 4096 + 0.5
)); p1 = p5 + p1*(int) (((-0.899976223f) * 4096 + 0.5)); p2 =
p5 + p2*(int) (((-2.562915447f) * 4096 + 0.5)); p3 = p3*(int
) (((-1.961570560f) * 4096 + 0.5)); p4 = p4*(int) (((-0.390180644f
) * 4096 + 0.5)); t3 += p1+p4; t2 += p2+p3; t1 += p2+p4; t0 +=
p1+p3;
969 // constants scaled things up by 1<<12, plus we had 1<<2 from first
970 // loop, plus horizontal and vertical each scale by sqrt(8) so together
971 // we've got an extra 1<<3, so 1<<17 total we need to remove.
972 x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
973 o[0] = clamp((x0+t3) >> 17);
974 o[7] = clamp((x0-t3) >> 17);
975 o[1] = clamp((x1+t2) >> 17);
976 o[6] = clamp((x1-t2) >> 17);
977 o[2] = clamp((x2+t1) >> 17);
978 o[5] = clamp((x2-t1) >> 17);
979 o[3] = clamp((x3+t0) >> 17);
980 o[4] = clamp((x3-t0) >> 17);
981 }
982}
983static stbi_idct_8x8 stbi_idct_installed = idct_block;
984
985extern void stbi_install_idct(stbi_idct_8x8 func)
986{
987 stbi_idct_installed = func;
988}
989#endif
990
991#define MARKER_none0xff 0xff
992// if there's a pending marker from the entropy stream, return that
993// otherwise, fetch from the stream and get a marker. if there's no
994// marker, return 0xff, which is never a valid marker value
995static uint8 get_marker(jpeg *j)
996{
997 uint8 x;
998 if (j->marker != MARKER_none0xff) { x = j->marker; j->marker = MARKER_none0xff; return x; }
999 x = get8u(&j->s);
1000 if (x != 0xff) return MARKER_none0xff;
1001 while (x == 0xff)
1002 x = get8u(&j->s);
1003 return x;
1004}
1005
1006// in each scan, we'll have scan_n components, and the order
1007// of the components is specified by order[]
1008#define RESTART(x)((x) >= 0xd0 && (x) <= 0xd7) ((x) >= 0xd0 && (x) <= 0xd7)
1009
1010// after a restart interval, reset the entropy decoder and
1011// the dc prediction
1012static void reset(jpeg *j)
1013{
1014 j->code_bits = 0;
1015 j->code_buffer = 0;
1016 j->nomore = 0;
1017 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1018 j->marker = MARKER_none0xff;
1019 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1020 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1021 // since we don't even allow 1<<30 pixels
1022}
1023
1024static int parse_entropy_coded_data(jpeg *z)
1025{
1026 reset(z);
1027 if (z->scan_n == 1) {
1028 int i,j;
1029 #if STBI_SIMD
1030 __declspec(align(16))
1031 #endif
1032 short data[64];
1033 int n = z->order[0];
1034 // non-interleaved data, we just need to process one block at a time,
1035 // in trivial scanline order
1036 // number of blocks to do just depends on how many actual "pixels" this
1037 // component has, independent of interleaved MCU blocking and such
1038 int w = (z->img_comp[n].x+7) >> 3;
1039 int h = (z->img_comp[n].y+7) >> 3;
1040 for (j=0; j < h; ++j) {
1041 for (i=0; i < w; ++i) {
1042 if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1043 #if STBI_SIMD
1044 stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1045 #else
1046 idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1047 #endif
1048 // every data block is an MCU, so countdown the restart interval
1049 if (--z->todo <= 0) {
1050 if (z->code_bits < 24) grow_buffer_unsafe(z);
1051 // if it's NOT a restart, then just bail, so we get corrupt data
1052 // rather than no data
1053 if (!RESTART(z->marker)((z->marker) >= 0xd0 && (z->marker) <= 0xd7
)
) return 1;
1054 reset(z);
1055 }
1056 }
1057 }
1058 } else { // interleaved!
1059 int i,j,k,x,y;
1060 short data[64];
1061 for (j=0; j < z->img_mcu_y; ++j) {
1062 for (i=0; i < z->img_mcu_x; ++i) {
1063 // scan an interleaved mcu... process scan_n components in order
1064 for (k=0; k < z->scan_n; ++k) {
1065 int n = z->order[k];
1066 // scan out an mcu's worth of this component; that's just determined
1067 // by the basic H and V specified for the component
1068 for (y=0; y < z->img_comp[n].v; ++y) {
1069 for (x=0; x < z->img_comp[n].h; ++x) {
1070 int x2 = (i*z->img_comp[n].h + x)*8;
1071 int y2 = (j*z->img_comp[n].v + y)*8;
1072 if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1073 #if STBI_SIMD
1074 stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1075 #else
1076 idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1077 #endif
1078 }
1079 }
1080 }
1081 // after all interleaved components, that's an interleaved MCU,
1082 // so now count down the restart interval
1083 if (--z->todo <= 0) {
1084 if (z->code_bits < 24) grow_buffer_unsafe(z);
1085 // if it's NOT a restart, then just bail, so we get corrupt data
1086 // rather than no data
1087 if (!RESTART(z->marker)((z->marker) >= 0xd0 && (z->marker) <= 0xd7
)
) return 1;
1088 reset(z);
1089 }
1090 }
1091 }
1092 }
1093 return 1;
1094}
1095
1096static int process_marker(jpeg *z, int m)
1097{
1098 int L;
1099 switch (m) {
1100 case MARKER_none0xff: // no marker found
1101 return e("expected marker","Corrupt JPEG")e("Corrupt JPEG");
1102
1103 case 0xC2: // SOF - progressive
1104 return e("progressive jpeg","JPEG format not supported (progressive)")e("JPEG format not supported (progressive)");
1105
1106 case 0xDD: // DRI - specify restart interval
1107 if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG")e("Corrupt JPEG");
1108 z->restart_interval = get16(&z->s);
1109 return 1;
1110
1111 case 0xDB: // DQT - define quantization table
1112 L = get16(&z->s)-2;
1113 while (L > 0) {
1114 int q = get8(&z->s);
1115 int p = q >> 4;
1116 int t = q & 15,i;
1117 if (p != 0) return e("bad DQT type","Corrupt JPEG")e("Corrupt JPEG");
1118 if (t > 3) return e("bad DQT table","Corrupt JPEG")e("Corrupt JPEG");
1119 for (i=0; i < 64; ++i)
1120 z->dequant[t][dezigzag[i]] = get8u(&z->s);
1121 #if STBI_SIMD
1122 for (i=0; i < 64; ++i)
1123 z->dequant2[t][i] = z->dequant[t][i];
1124 #endif
1125 L -= 65;
1126 }
1127 return L==0;
1128
1129 case 0xC4: // DHT - define huffman table
1130 L = get16(&z->s)-2;
1131 while (L > 0) {
1132 uint8 *v;
1133 int sizes[16],i,m2=0;
1134 int q = get8(&z->s);
1135 int tc = q >> 4;
1136 int th = q & 15;
1137 if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG")e("Corrupt JPEG");
1138 for (i=0; i < 16; ++i) {
1139 sizes[i] = get8(&z->s);
1140 m2 += sizes[i];
1141 }
1142 L -= 17;
1143 if (tc == 0) {
1144 if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1145 v = z->huff_dc[th].values;
1146 } else {
1147 if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1148 v = z->huff_ac[th].values;
1149 }
1150 for (i=0; i < m2; ++i)
1151 v[i] = get8u(&z->s);
1152 L -= m2;
1153 }
1154 return L==0;
1155 }
1156 // check for comment block or APP blocks
1157 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
1158 skip(&z->s, get16(&z->s)-2);
1159 return 1;
1160 }
1161 return 0;
1162}
1163
1164// after we see SOS
1165static int process_scan_header(jpeg *z)
1166{
1167 int i;
1168 int Ls = get16(&z->s);
1169 z->scan_n = get8(&z->s);
1170 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG")e("Corrupt JPEG");
1171 if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG")e("Corrupt JPEG");
1172 for (i=0; i < z->scan_n; ++i) {
1173 int ID = get8(&z->s), which;
1174 int q = get8(&z->s);
1175 for (which = 0; which < z->s.img_n; ++which)
1176 if (z->img_comp[which].id == ID)
1177 break;
1178 if (which == z->s.img_n) return 0;
1179 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG")e("Corrupt JPEG");
1180 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG")e("Corrupt JPEG");
1181 z->order[i] = which;
1182 }
1183 if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG")e("Corrupt JPEG");
1184 get8(&z->s); // should be 63, but might be 0
1185 if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG")e("Corrupt JPEG");
1186
1187 return 1;
1188}
1189
1190static int process_frame_header(jpeg *z, int scan)
1191{
1192 stbi *s = &z->s;
1193 int Lf,p,i,q, h_max=1,v_max=1,c;
1194 Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG")e("Corrupt JPEG"); // JPEG
1195 p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only")e("JPEG format not supported: 8-bit only"); // JPEG baseline
1196 s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height")e("JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1197 s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG")e("Corrupt JPEG"); // JPEG requires
1198 c = get8(s);
1199 if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG")e("Corrupt JPEG"); // JFIF requires
1200 s->img_n = c;
1201 for (i=0; i < c; ++i) {
1202 z->img_comp[i].data = NULL((void*)0);
1203 z->img_comp[i].linebuf = NULL((void*)0);
1204 }
1205
1206 if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG")e("Corrupt JPEG");
1207
1208 for (i=0; i < s->img_n; ++i) {
1209 z->img_comp[i].id = get8(s);
1210 if (z->img_comp[i].id != i+1) // JFIF requires
1211 if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
1212 return e("bad component ID","Corrupt JPEG")e("Corrupt JPEG");
1213 q = get8(s);
1214 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG")e("Corrupt JPEG");
1215 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG")e("Corrupt JPEG");
1216 z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG")e("Corrupt JPEG");
1217 }
1218
1219 if (scan != SCAN_load) return 1;
1220
1221 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode")e("Image too large to decode");
1222
1223 for (i=0; i < s->img_n; ++i) {
1224 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1225 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1226 }
1227
1228 // compute interleaved mcu info
1229 z->img_h_max = h_max;
1230 z->img_v_max = v_max;
1231 z->img_mcu_w = h_max * 8;
1232 z->img_mcu_h = v_max * 8;
1233 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1234 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1235
1236 for (i=0; i < s->img_n; ++i) {
1237 // number of effective pixels (e.g. for non-interleaved MCU)
1238 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1239 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1240 // to simplify generation, we'll allocate enough memory to decode
1241 // the bogus oversized data from using interleaved MCUs and their
1242 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1243 // discard the extra data until colorspace conversion
1244 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1245 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1246 z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1247 if (z->img_comp[i].raw_data == NULL((void*)0)) {
1248 for(--i; i >= 0; --i) {
1249 free(z->img_comp[i].raw_data);
1250 z->img_comp[i].data = NULL((void*)0);
1251 }
1252 return e("outofmem", "Out of memory")e("Out of memory");
1253 }
1254 // align blocks for installable-idct using mmx/sse
1255 z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1256 z->img_comp[i].linebuf = NULL((void*)0);
1257 }
1258
1259 return 1;
1260}
1261
1262// use comparisons since in some cases we handle more than one case (e.g. SOF)
1263#define DNL(x)((x) == 0xdc) ((x) == 0xdc)
1264#define SOI(x)((x) == 0xd8) ((x) == 0xd8)
1265#define EOI(x)((x) == 0xd9) ((x) == 0xd9)
1266#define SOF(x)((x) == 0xc0 || (x) == 0xc1) ((x) == 0xc0 || (x) == 0xc1)
1267#define SOS(x)((x) == 0xda) ((x) == 0xda)
1268
1269static int decode_jpeg_header(jpeg *z, int scan)
1270{
1271 int m;
1272 z->marker = MARKER_none0xff; // initialize cached marker to empty
1273 m = get_marker(z);
1274 if (!SOI(m)((m) == 0xd8)) return e("no SOI","Corrupt JPEG")e("Corrupt JPEG");
1275 if (scan == SCAN_type) return 1;
1276 m = get_marker(z);
1277 while (!SOF(m)((m) == 0xc0 || (m) == 0xc1)) {
1278 if (!process_marker(z,m)) return 0;
1279 m = get_marker(z);
1280 while (m == MARKER_none0xff) {
1281 // some files have extra padding after their blocks, so ok, we'll scan
1282 if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG")e("Corrupt JPEG");
1283 m = get_marker(z);
1284 }
1285 }
1286 if (!process_frame_header(z, scan)) return 0;
1287 return 1;
1288}
1289
1290static int decode_jpeg_image(jpeg *j)
1291{
1292 int m;
1293 j->restart_interval = 0;
1294 if (!decode_jpeg_header(j, SCAN_load)) return 0;
1295 m = get_marker(j);
1296 while (!EOI(m)((m) == 0xd9)) {
1297 if (SOS(m)((m) == 0xda)) {
1298 if (!process_scan_header(j)) return 0;
1299 if (!parse_entropy_coded_data(j)) return 0;
1300 } else {
1301 if (!process_marker(j, m)) return 0;
1302 }
1303 m = get_marker(j);
1304 }
1305 return 1;
1306}
1307
1308// static jfif-centered resampling (across block boundaries)
1309
1310typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1311 int w, int hs);
1312
1313#define div4(x)((uint8) ((x) >> 2)) ((uint8) ((x) >> 2))
1314
1315static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1316{
1317 return in_near;
1318}
1319
1320static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1321{
1322 // need to generate two samples vertically for every one in input
1323 int i;
1324 for (i=0; i < w; ++i)
1325 out[i] = div4(3*in_near[i] + in_far[i] + 2)((uint8) ((3*in_near[i] + in_far[i] + 2) >> 2));
1326 return out;
1327}
1328
1329static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1330{
1331 // need to generate two samples horizontally for every one in input
1332 int i;
1333 uint8 *input = in_near;
1334 if (w == 1) {
1335 // if only one sample, can't do any interpolation
1336 out[0] = out[1] = input[0];
1337 return out;
1338 }
1339
1340 out[0] = input[0];
1341 out[1] = div4(input[0]*3 + input[1] + 2)((uint8) ((input[0]*3 + input[1] + 2) >> 2));
1342 for (i=1; i < w-1; ++i) {
1343 int n = 3*input[i]+2;
1344 out[i*2+0] = div4(n+input[i-1])((uint8) ((n+input[i-1]) >> 2));
1345 out[i*2+1] = div4(n+input[i+1])((uint8) ((n+input[i+1]) >> 2));
1346 }
1347 out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2)((uint8) ((input[w-2]*3 + input[w-1] + 2) >> 2));
1348 out[i*2+1] = input[w-1];
1349 return out;
1350}
1351
1352#define div16(x)((uint8) ((x) >> 4)) ((uint8) ((x) >> 4))
1353
1354static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1355{
1356 // need to generate 2x2 samples for every one in input
1357 int i,t0,t1;
1358 if (w == 1) {
1359 out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2)((uint8) ((3*in_near[0] + in_far[0] + 2) >> 2));
1360 return out;
1361 }
1362
1363 t1 = 3*in_near[0] + in_far[0];
1364 out[0] = div4(t1+2)((uint8) ((t1+2) >> 2));
1365 for (i=1; i < w; ++i) {
1366 t0 = t1;
1367 t1 = 3*in_near[i]+in_far[i];
1368 out[i*2-1] = div16(3*t0 + t1 + 8)((uint8) ((3*t0 + t1 + 8) >> 4));
1369 out[i*2 ] = div16(3*t1 + t0 + 8)((uint8) ((3*t1 + t0 + 8) >> 4));
1370 }
1371 out[w*2-1] = div4(t1+2)((uint8) ((t1+2) >> 2));
1372 return out;
1373}
1374
1375static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1376{
1377 // resample with nearest-neighbor
1378 int i,j;
1379 for (i=0; i < w; ++i)
1380 for (j=0; j < hs; ++j)
1381 out[i*hs+j] = in_near[i];
1382 return out;
1383}
1384
1385#define float2fixed(x)((int) ((x) * 65536 + 0.5)) ((int) ((x) * 65536 + 0.5))
1386
1387// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro)
1388// VC6 without processor=Pro is generating multiple LEAs per multiply!
1389static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1390{
1391 int i;
1392 for (i=0; i < count; ++i) {
1393 int y_fixed = (y[i] << 16) + 32768; // rounding
1394 int r,g,b;
1395 int cr = pcr[i] - 128;
1396 int cb = pcb[i] - 128;
1397 r = y_fixed + cr*float2fixed(1.40200f)((int) ((1.40200f) * 65536 + 0.5));
1398 g = y_fixed - cr*float2fixed(0.71414f)((int) ((0.71414f) * 65536 + 0.5)) - cb*float2fixed(0.34414f)((int) ((0.34414f) * 65536 + 0.5));
1399 b = y_fixed + cb*float2fixed(1.77200f)((int) ((1.77200f) * 65536 + 0.5));
1400 r >>= 16;
1401 g >>= 16;
1402 b >>= 16;
1403 if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1404 if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1405 if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1406 out[0] = (uint8)r;
1407 out[1] = (uint8)g;
1408 out[2] = (uint8)b;
1409 out[3] = 255;
1410 out += step;
1411 }
1412}
1413
1414#if STBI_SIMD
1415static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1416
1417void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1418{
1419 stbi_YCbCr_installed = func;
1420}
1421#endif
1422
1423
1424// clean up the temporary component buffers
1425static void cleanup_jpeg(jpeg *j)
1426{
1427 int i;
1428 for (i=0; i < j->s.img_n; ++i) {
1429 if (j->img_comp[i].data) {
1430 free(j->img_comp[i].raw_data);
1431 j->img_comp[i].data = NULL((void*)0);
1432 }
1433 if (j->img_comp[i].linebuf) {
1434 free(j->img_comp[i].linebuf);
1435 j->img_comp[i].linebuf = NULL((void*)0);
1436 }
1437 }
1438}
1439
1440typedef struct
1441{
1442 resample_row_func resample;
1443 uint8 *line0,*line1;
1444 int hs,vs; // expansion factor in each axis
1445 int w_lores; // horizontal pixels pre-expansion
1446 int ystep; // how far through vertical expansion we are
1447 int ypos; // which pre-expansion row we're on
1448} stbi_resample;
1449
1450static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1451{
1452 int n, decode_n;
1453 // validate req_comp
1454 if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error")((unsigned char *) (e("Internal error")?((void*)0):((void*)0)
))
;
1455 z->s.img_n = 0;
1456
1457 // load a jpeg image from whichever source
1458 if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL((void*)0); }
1459
1460 // determine actual number of components to generate
1461 n = req_comp ? req_comp : z->s.img_n;
1462
1463 if (z->s.img_n == 3 && n < 3)
1464 decode_n = 1;
1465 else
1466 decode_n = z->s.img_n;
1467
1468 // resample and color-convert
1469 {
1470 int k;
1471 uint i,j;
1472 uint8 *output;
1473 uint8 *coutput[4];
1474
1475 stbi_resample res_comp[4];
1476
1477 for (k=0; k < decode_n; ++k) {
1478 stbi_resample *r = &res_comp[k];
1479
1480 // allocate line buffer big enough for upsampling off the edges
1481 // with upsample factor of 4
1482 z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3);
1483 if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
; }
1484
1485 r->hs = z->img_h_max / z->img_comp[k].h;
1486 r->vs = z->img_v_max / z->img_comp[k].v;
1487 r->ystep = r->vs >> 1;
1488 r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
1489 r->ypos = 0;
1490 r->line0 = r->line1 = z->img_comp[k].data;
1491
1492 if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
1493 else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
1494 else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
1495 else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
1496 else r->resample = resample_row_generic;
1497 }
1498
1499 // can't error after this so, this is safe
1500 output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1);
1501 if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
; }
1502
1503 // now go ahead and resample
1504 for (j=0; j < z->s.img_y; ++j) {
1505 uint8 *out = output + n * z->s.img_x * j;
1506 for (k=0; k < decode_n; ++k) {
1507 stbi_resample *r = &res_comp[k];
1508 int y_bot = r->ystep >= (r->vs >> 1);
1509 coutput[k] = r->resample(z->img_comp[k].linebuf,
1510 y_bot ? r->line1 : r->line0,
1511 y_bot ? r->line0 : r->line1,
1512 r->w_lores, r->hs);
1513 if (++r->ystep >= r->vs) {
1514 r->ystep = 0;
1515 r->line0 = r->line1;
1516 if (++r->ypos < z->img_comp[k].y)
1517 r->line1 += z->img_comp[k].w2;
1518 }
1519 }
1520 if (n >= 3) {
1521 uint8 *y = coutput[0];
1522 if (z->s.img_n == 3) {
1523 #if STBI_SIMD
1524 stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1525 #else
1526 YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
1527 #endif
1528 } else
1529 for (i=0; i < z->s.img_x; ++i) {
1530 out[0] = out[1] = out[2] = y[i];
1531 out[3] = 255; // not used if n==3
1532 out += n;
1533 }
1534 } else {
1535 uint8 *y = coutput[0];
1536 if (n == 1)
1537 for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
1538 else
1539 for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
1540 }
1541 }
1542 cleanup_jpeg(z);
1543 *out_x = z->s.img_x;
1544 *out_y = z->s.img_y;
1545 if (comp) *comp = z->s.img_n; // report original components, not output
1546 return output;
1547 }
1548}
1549
1550#ifndef STBI_NO_STDIO
1551unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1552{
1553 jpeg j;
1554 start_file(&j.s, f);
1555 return load_jpeg_image(&j, x,y,comp,req_comp);
1556}
1557
1558unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1559{
1560 unsigned char *data;
1561 FILE *f = fopen(filename, "rb");
1562 if (!f) return NULL((void*)0);
1563 data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
1564 fclose(f);
1565 return data;
1566}
1567#endif
1568
1569unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1570{
1571 jpeg j;
1572 start_mem(&j.s, buffer,len);
1573 return load_jpeg_image(&j, x,y,comp,req_comp);
1574}
1575
1576#ifndef STBI_NO_STDIO
1577int stbi_jpeg_test_file(FILE *f)
1578{
1579 int n,r;
1580 jpeg j;
1581 n = ftell(f);
1582 start_file(&j.s, f);
1583 r = decode_jpeg_header(&j, SCAN_type);
1584 fseek(f,n,SEEK_SET0);
1585 return r;
1586}
1587#endif
1588
1589int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
1590{
1591 jpeg j;
1592 start_mem(&j.s, buffer,len);
1593 return decode_jpeg_header(&j, SCAN_type);
1594}
1595
1596// @TODO:
1597#ifndef STBI_NO_STDIO
1598extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp);
1599extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp);
1600#endif
1601extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
1602
1603// public domain zlib decode v0.2 Sean Barrett 2006-11-18
1604// simple implementation
1605// - all input must be provided in an upfront buffer
1606// - all output is written to a single output buffer (can malloc/realloc)
1607// performance
1608// - fast huffman
1609
1610// fast-way is faster to check than jpeg huffman, but slow way is slower
1611#define ZFAST_BITS9 9 // accelerate all cases in default tables
1612#define ZFAST_MASK((1 << 9) - 1) ((1 << ZFAST_BITS9) - 1)
1613
1614// zlib-style huffman encoding
1615// (jpegs packs from left, zlib from right, so can't share code)
1616typedef struct
1617{
1618 uint16 fast[1 << ZFAST_BITS9];
1619 uint16 firstcode[16];
1620 int maxcode[17];
1621 uint16 firstsymbol[16];
1622 uint8 size[288];
1623 uint16 value[288];
1624} zhuffman;
1625
1626__forceinline static int bitreverse16(int n)
1627{
1628 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
1629 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
1630 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
1631 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
1632 return n;
1633}
1634
1635__forceinline static int bit_reverse(int v, int bits)
1636{
1637 assert(bits <= 16)((void) (0));
1638 // to bit reverse n bits, reverse 16 and shift
1639 // e.g. 11 bits, bit reverse and shift away 5
1640 return bitreverse16(v) >> (16-bits);
1641}
1642
1643static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1644{
1645 int i,k=0;
1646 int code, next_code[16], sizes[17];
1647
1648 // DEFLATE spec for generating codes
1649 memset(sizes, 0, sizeof(sizes));
1650 memset(z->fast, 255, sizeof(z->fast));
1651 for (i=0; i < num; ++i)
1652 ++sizes[sizelist[i]];
1653 sizes[0] = 0;
1654 for (i=1; i < 16; ++i)
1655 assert(sizes[i] <= (1 << i))((void) (0));
1656 code = 0;
1657 for (i=1; i < 16; ++i) {
1658 next_code[i] = code;
1659 z->firstcode[i] = (uint16) code;
1660 z->firstsymbol[i] = (uint16) k;
1661 code = (code + sizes[i]);
1662 if (sizes[i])
1663 if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG")e("Corrupt JPEG");
1664 z->maxcode[i] = code << (16-i); // preshift for inner loop
1665 code <<= 1;
1666 k += sizes[i];
1667 }
1668 z->maxcode[16] = 0x10000; // sentinel
1669 for (i=0; i < num; ++i) {
1670 int s = sizelist[i];
1671 if (s) {
1672 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1673 z->size[c] = (uint8)s;
1674 z->value[c] = (uint16)i;
1675 if (s <= ZFAST_BITS9) {
1676 int k2 = bit_reverse(next_code[s],s);
1677 while (k2 < (1 << ZFAST_BITS9)) {
1678 z->fast[k2] = (uint16) c;
1679 k2 += (1 << s);
1680 }
1681 }
1682 ++next_code[s];
1683 }
1684 }
1685 return 1;
1686}
1687
1688// zlib-from-memory implementation for PNG reading
1689// because PNG allows splitting the zlib stream arbitrarily,
1690// and it's annoying structurally to have PNG call ZLIB call PNG,
1691// we require PNG read all the IDATs and combine them into a single
1692// memory buffer
1693
1694typedef struct
1695{
1696 uint8 *zbuffer, *zbuffer_end;
1697 int num_bits;
1698 uint32 code_buffer;
1699
1700 char *zout;
1701 char *zout_start;
1702 char *zout_end;
1703 int z_expandable;
1704
1705 zhuffman z_length, z_distance;
1706} zbuf;
1707
1708__forceinline static int zget8(zbuf *z)
1709{
1710 if (z->zbuffer >= z->zbuffer_end) return 0;
1711 return *z->zbuffer++;
1712}
1713
1714static void fill_bits(zbuf *z)
1715{
1716 do {
1717 assert(z->code_buffer < (1U << z->num_bits))((void) (0));
1718 z->code_buffer |= zget8(z) << z->num_bits;
1719 z->num_bits += 8;
1720 } while (z->num_bits <= 24);
1721}
1722
1723__forceinline static unsigned int zreceive(zbuf *z, int n)
1724{
1725 unsigned int k;
1726 if (z->num_bits < n) fill_bits(z);
1727 k = z->code_buffer & ((1 << n) - 1);
1728 z->code_buffer >>= n;
1729 z->num_bits -= n;
1730 return k;
1731}
1732
1733__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
1734{
1735 int b,s,k;
1736 if (a->num_bits < 16) fill_bits(a);
1737 b = z->fast[a->code_buffer & ZFAST_MASK((1 << 9) - 1)];
1738 if (b < 0xffff) {
1739 s = z->size[b];
1740 a->code_buffer >>= s;
1741 a->num_bits -= s;
1742 return z->value[b];
1743 }
1744
1745 // not resolved by fast table, so compute it the slow way
1746 // use jpeg approach, which requires MSbits at top
1747 k = bit_reverse(a->code_buffer, 16);
1748 for (s=ZFAST_BITS9+1; ; ++s)
1749 if (k < z->maxcode[s])
1750 break;
1751 if (s == 16) return -1; // invalid code!
1752 // code size is s, so:
1753 b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
1754 assert(z->size[b] == s)((void) (0));
1755 a->code_buffer >>= s;
1756 a->num_bits -= s;
1757 return z->value[b];
1758}
1759
1760static int expand(zbuf *z, int n) // need to make room for n bytes
1761{
1762 char *q;
1763 int cur, limit;
1764 if (!z->z_expandable) return e("output buffer limit","Corrupt PNG")e("Corrupt PNG");
1765 cur = (int) (z->zout - z->zout_start);
1766 limit = (int) (z->zout_end - z->zout_start);
1767 while (cur + n > limit)
1768 limit *= 2;
1769 q = (char *) realloc(z->zout_start, limit);
1770 if (q == NULL((void*)0)) return e("outofmem", "Out of memory")e("Out of memory");
1771 z->zout_start = q;
1772 z->zout = q + cur;
1773 z->zout_end = q + limit;
1774 return 1;
1775}
1776
1777static int length_base[31] = {
1778 3,4,5,6,7,8,9,10,11,13,
1779 15,17,19,23,27,31,35,43,51,59,
1780 67,83,99,115,131,163,195,227,258,0,0 };
1781
1782static int length_extra[31]=
1783{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
1784
1785static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
1786257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
1787
1788static int dist_extra[32] =
1789{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
1790
1791static int parse_huffman_block(zbuf *a)
1792{
1793 for(;;) {
1794 int z = zhuffman_decode(a, &a->z_length);
1795 if (z < 256) {
1796 if (z < 0) return e("bad huffman code","Corrupt PNG")e("Corrupt PNG"); // error in huffman codes
1797 if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
1798 *a->zout++ = (char) z;
1799 } else {
1800 uint8 *p;
1801 int len,dist;
1802 if (z == 256) return 1;
1803 z -= 257;
1804 len = length_base[z];
1805 if (length_extra[z]) len += zreceive(a, length_extra[z]);
1806 z = zhuffman_decode(a, &a->z_distance);
1807 if (z < 0) return e("bad huffman code","Corrupt PNG")e("Corrupt PNG");
1808 dist = dist_base[z];
1809 if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
1810 if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG")e("Corrupt PNG");
1811 if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
1812 p = (uint8 *) (a->zout - dist);
1813 while (len--)
1814 *a->zout++ = *p++;
1815 }
1816 }
1817}
1818
1819static int compute_huffman_codes(zbuf *a)
1820{
1821 static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
1822 zhuffman z_codelength;
1823 uint8 lencodes[286+32+137];//padding for maximum single op
1824 uint8 codelength_sizes[19];
1825 int i,n;
1826
1827 int hlit = zreceive(a,5) + 257;
1828 int hdist = zreceive(a,5) + 1;
1829 int hclen = zreceive(a,4) + 4;
1830
1831 memset(codelength_sizes, 0, sizeof(codelength_sizes));
1832 for (i=0; i < hclen; ++i) {
1833 int s = zreceive(a,3);
1834 codelength_sizes[length_dezigzag[i]] = (uint8) s;
1835 }
1836 if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
1837
1838 n = 0;
1839 while (n < hlit + hdist) {
1840 int c = zhuffman_decode(a, &z_codelength);
1841 assert(c >= 0 && c < 19)((void) (0));
1842 if (c < 16)
1843 lencodes[n++] = (uint8) c;
1844 else if (c == 16) {
1845 c = zreceive(a,2)+3;
1846 memset(lencodes+n, lencodes[n-1], c);
1847 n += c;
1848 } else if (c == 17) {
1849 c = zreceive(a,3)+3;
1850 memset(lencodes+n, 0, c);
1851 n += c;
1852 } else {
1853 assert(c == 18)((void) (0));
1854 c = zreceive(a,7)+11;
1855 memset(lencodes+n, 0, c);
1856 n += c;
1857 }
1858 }
1859 if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG")e("Corrupt PNG");
1860 if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
1861 if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
1862 return 1;
1863}
1864
1865static int parse_uncompressed_block(zbuf *a)
1866{
1867 uint8 header[4];
1868 int len,nlen,k;
1869 if (a->num_bits & 7)
1870 zreceive(a, a->num_bits & 7); // discard
1871 // drain the bit-packed data into header
1872 k = 0;
1873 while (a->num_bits > 0) {
1874 header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
1875 a->code_buffer >>= 8;
1876 a->num_bits -= 8;
1877 }
1878 assert(a->num_bits == 0)((void) (0));
1879 // now fill header the normal way
1880 while (k < 4)
1881 header[k++] = (uint8) zget8(a);
1882 len = header[1] * 256 + header[0];
1883 nlen = header[3] * 256 + header[2];
1884 if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG")e("Corrupt PNG");
1885 if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG")e("Corrupt PNG");
1886 if (a->zout + len > a->zout_end)
1887 if (!expand(a, len)) return 0;
1888 memcpy(a->zout, a->zbuffer, len);
1889 a->zbuffer += len;
1890 a->zout += len;
1891 return 1;
1892}
1893
1894static int parse_zlib_header(zbuf *a)
1895{
1896 int cmf = zget8(a);
1897 int cm = cmf & 15;
1898 /* int cinfo = cmf >> 4; */
1899 int flg = zget8(a);
1900 if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG")e("Corrupt PNG"); // zlib spec
1901 if (flg & 32) return e("no preset dict","Corrupt PNG")e("Corrupt PNG"); // preset dictionary not allowed in png
1902 if (cm != 8) return e("bad compression","Corrupt PNG")e("Corrupt PNG"); // DEFLATE required for png
1903 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
1904 return 1;
1905}
1906
1907// @TODO: should statically initialize these for optimal thread safety
1908static uint8 default_length[288], default_distance[32];
1909static void init_defaults(void)
1910{
1911 int i; // use <= to match clearly with spec
1912 for (i=0; i <= 143; ++i) default_length[i] = 8;
1913 for ( ; i <= 255; ++i) default_length[i] = 9;
1914 for ( ; i <= 279; ++i) default_length[i] = 7;
1915 for ( ; i <= 287; ++i) default_length[i] = 8;
1916
1917 for (i=0; i <= 31; ++i) default_distance[i] = 5;
1918}
1919
1920int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
1921static int parse_zlib(zbuf *a, int parse_header)
1922{
1923 int final, type;
1924 if (parse_header)
1925 if (!parse_zlib_header(a)) return 0;
1926 a->num_bits = 0;
1927 a->code_buffer = 0;
1928 do {
1929 final = zreceive(a,1);
1930 type = zreceive(a,2);
1931 if (type == 0) {
1932 if (!parse_uncompressed_block(a)) return 0;
1933 } else if (type == 3) {
1934 return 0;
1935 } else {
1936 if (type == 1) {
1937 // use fixed code lengths
1938 if (!default_distance[31]) init_defaults();
1939 if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0;
1940 if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0;
1941 } else {
1942 if (!compute_huffman_codes(a)) return 0;
1943 }
1944 if (!parse_huffman_block(a)) return 0;
1945 }
1946 if (stbi_png_partial && a->zout - a->zout_start > 65536)
1947 break;
1948 } while (!final);
1949 return 1;
1950}
1951
1952static int do_zlib(zbuf *a, char *obuf, int olen, int expandable, int parse_header)
1953{
1954 a->zout_start = obuf;
1955 a->zout = obuf;
1956 a->zout_end = obuf + olen;
1957 a->z_expandable = expandable;
1958
1959 return parse_zlib(a, parse_header);
1960}
1961
1962char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
1963{
1964 zbuf a;
1965 char *p = (char *) malloc(initial_size);
1966 if (p == NULL((void*)0)) return NULL((void*)0);
1967 a.zbuffer = (uint8 *) buffer;
1968 a.zbuffer_end = (uint8 *) buffer + len;
1969 if (do_zlib(&a, p, initial_size, 1, 1)) {
1970 if (outlen) *outlen = (int) (a.zout - a.zout_start);
1971 return a.zout_start;
1972 } else {
1973 free(a.zout_start);
1974 return NULL((void*)0);
1975 }
1976}
1977
1978char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
1979{
1980 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
1981}
1982
1983int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
1984{
1985 zbuf a;
1986 a.zbuffer = (uint8 *) ibuffer;
1987 a.zbuffer_end = (uint8 *) ibuffer + ilen;
1988 if (do_zlib(&a, obuffer, olen, 0, 1))
1989 return (int) (a.zout - a.zout_start);
1990 else
1991 return -1;
1992}
1993
1994char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
1995{
1996 zbuf a;
1997 char *p = (char *) malloc(16384);
1998 if (p == NULL((void*)0)) return NULL((void*)0);
1999 a.zbuffer = (uint8 *) buffer;
2000 a.zbuffer_end = (uint8 *) buffer+len;
2001 if (do_zlib(&a, p, 16384, 1, 0)) {
2002 if (outlen) *outlen = (int) (a.zout - a.zout_start);
2003 return a.zout_start;
2004 } else {
2005 free(a.zout_start);
2006 return NULL((void*)0);
2007 }
2008}
2009
2010int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2011{
2012 zbuf a;
2013 a.zbuffer = (uint8 *) ibuffer;
2014 a.zbuffer_end = (uint8 *) ibuffer + ilen;
2015 if (do_zlib(&a, obuffer, olen, 0, 0))
2016 return (int) (a.zout - a.zout_start);
2017 else
2018 return -1;
2019}
2020
2021// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
2022// simple implementation
2023// - only 8-bit samples
2024// - no CRC checking
2025// - allocates lots of intermediate memory
2026// - avoids problem of streaming data between subsystems
2027// - avoids explicit window management
2028// performance
2029// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2030
2031
2032typedef struct
2033{
2034 uint32 length;
2035 uint32 type;
2036} chunk;
2037
2038#define PNG_TYPE(a,b,c,d)(((a) << 24) + ((b) << 16) + ((c) << 8) + (
d))
(((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2039
2040static chunk get_chunk_header(stbi *s)
2041{
2042 chunk c;
2043 c.length = get32(s);
2044 c.type = get32(s);
2045 return c;
2046}
2047
2048static int check_png_header(stbi *s)
2049{
2050 static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2051 int i;
2052 for (i=0; i < 8; ++i)
2053 if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG")e("Not a PNG");
2054 return 1;
2055}
2056
2057typedef struct
2058{
2059 stbi s;
2060 uint8 *idata, *expanded, *out;
2061} png;
2062
2063
2064enum {
2065 F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2066 F_avg_first, F_paeth_first,
2067};
2068
2069static uint8 first_row_filter[5] =
2070{
2071 F_none, F_sub, F_none, F_avg_first, F_paeth_first
2072};
2073
2074static int paeth(int a, int b, int c)
2075{
2076 int p = a + b - c;
2077 int pa = abs(p-a);
2078 int pb = abs(p-b);
2079 int pc = abs(p-c);
2080 if (pa <= pb && pa <= pc) return a;
2081 if (pb <= pc) return b;
2082 return c;
2083}
2084
2085// create the png data from post-deflated data
2086static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2087{
2088 stbi *s = &a->s;
2089 uint32 i,j,stride = x*out_n;
2090 int k;
2091 int img_n = s->img_n; // copy it into a local for later
2092 assert(out_n == s->img_n || out_n == s->img_n+1)((void) (0));
2093 if (stbi_png_partial) y = 1;
2094 a->out = (uint8 *) malloc(x * y * out_n);
2095 if (!a->out) return e("outofmem", "Out of memory")e("Out of memory");
2096 if (!stbi_png_partial) {
2097 if (s->img_x == x && s->img_y == y) {
2098 if (raw_len != (img_n * x + 1) * y)
2099 return e("not enough pixels","Corrupt PNG")e("Corrupt PNG");
2100 } else { // interlaced:
2101 if (raw_len < (img_n * x + 1) * y)
2102 return e("not enough pixels","Corrupt PNG")e("Corrupt PNG");
2103 }
2104 }
2105 for (j=0; j < y; ++j) {
2106 uint8 *cur = a->out + stride*j;
2107 uint8 *prior = cur - stride;
2108 int filter = *raw++;
2109 if (filter > 4) return e("invalid filter","Corrupt PNG")e("Corrupt PNG");
2110 // if first row, use special filter that doesn't sample previous row
2111 if (j == 0) filter = first_row_filter[filter];
2112 // handle first pixel explicitly
2113 for (k=0; k < img_n; ++k) {
2114 switch(filter) {
2115 case F_none : cur[k] = raw[k]; break;
2116 case F_sub : cur[k] = raw[k]; break;
2117 case F_up : cur[k] = raw[k] + prior[k]; break;
2118 case F_avg : cur[k] = raw[k] + (prior[k]>>1); break;
2119 case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2120 case F_avg_first : cur[k] = raw[k]; break;
2121 case F_paeth_first: cur[k] = raw[k]; break;
2122 }
2123 }
2124 if (img_n != out_n) cur[img_n] = 255;
2125 raw += img_n;
2126 cur += out_n;
2127 prior += out_n;
2128 // this is a little gross, so that we don't switch per-pixel or per-component
2129 if (img_n == out_n) {
2130 #define CASE(f) \
2131 case f: \
2132 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2133 for (k=0; k < img_n; ++k)
2134 switch(filter) {
2135 CASE(F_none) cur[k] = raw[k]; break;
2136 CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break;
2137 CASE(F_up) cur[k] = raw[k] + prior[k]; break;
2138 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break;
2139 CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
2140 CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break;
2141 CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break;
2142 }
2143 #undef CASE
2144 } else {
2145 assert(img_n+1 == out_n)((void) (0));
2146 #define CASE(f) \
2147 case f: \
2148 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2149 for (k=0; k < img_n; ++k)
2150 switch(filter) {
2151 CASE(F_none) cur[k] = raw[k]; break;
2152 CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break;
2153 CASE(F_up) cur[k] = raw[k] + prior[k]; break;
2154 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break;
2155 CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
2156 CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break;
2157 CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break;
2158 }
2159 #undef CASE
2160 }
2161 }
2162 return 1;
2163}
2164
2165static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2166{
2167 uint8 *final;
2168 int p;
2169 int save;
2170 if (!interlaced)
2171 return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y);
2172 save = stbi_png_partial;
2173 stbi_png_partial = 0;
2174
2175 // de-interlacing
2176 final = (uint8 *) malloc(a->s.img_x * a->s.img_y * out_n);
2177 for (p=0; p < 7; ++p) {
2178 int xorig[] = { 0,4,0,2,0,1,0 };
2179 int yorig[] = { 0,0,4,0,2,0,1 };
2180 int xspc[] = { 8,8,4,4,2,2,1 };
2181 int yspc[] = { 8,8,8,4,4,2,2 };
2182 int i,j,x,y;
2183 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2184 x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
2185 y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
2186 if (x && y) {
2187 if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
2188 free(final);
2189 return 0;
2190 }
2191 for (j=0; j < y; ++j)
2192 for (i=0; i < x; ++i)
2193 memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
2194 a->out + (j*x+i)*out_n, out_n);
2195 free(a->out);
2196 raw += (x*out_n+1)*y;
2197 raw_len -= (x*out_n+1)*y;
2198 }
2199 }
2200 a->out = final;
2201
2202 stbi_png_partial = save;
2203 return 1;
2204}
2205
2206static int compute_transparency(png *z, uint8 tc[3], int out_n)
2207{
2208 stbi *s = &z->s;
2209 uint32 i, pixel_count = s->img_x * s->img_y;
2210 uint8 *p = z->out;
2211
2212 // compute color-based transparency, assuming we've
2213 // already got 255 as the alpha value in the output
2214 assert(out_n == 2 || out_n == 4)((void) (0));
2215
2216 if (out_n == 2) {
2217 for (i=0; i < pixel_count; ++i) {
2218 p[1] = (p[0] == tc[0] ? 0 : 255);
2219 p += 2;
2220 }
2221 } else {
2222 for (i=0; i < pixel_count; ++i) {
2223 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2224 p[3] = 0;
2225 p += 4;
2226 }
2227 }
2228 return 1;
2229}
2230
2231static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2232{
2233 uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2234 uint8 *p, *temp_out, *orig = a->out;
2235
2236 p = (uint8 *) malloc(pixel_count * pal_img_n);
2237 if (p == NULL((void*)0)) return e("outofmem", "Out of memory")e("Out of memory");
2238
2239 // between here and free(out) below, exitting would leak
2240 temp_out = p;
2241
2242 if (pal_img_n == 3) {
2243 for (i=0; i < pixel_count; ++i) {
2244 int n = orig[i]*4;
2245 p[0] = palette[n ];
2246 p[1] = palette[n+1];
2247 p[2] = palette[n+2];
2248 p += 3;
2249 }
2250 } else {
2251 for (i=0; i < pixel_count; ++i) {
2252 int n = orig[i]*4;
2253 p[0] = palette[n ];
2254 p[1] = palette[n+1];
2255 p[2] = palette[n+2];
2256 p[3] = palette[n+3];
2257 p += 4;
2258 }
2259 }
2260 free(a->out);
2261 a->out = temp_out;
2262 return 1;
2263}
2264
2265static int parse_png_file(png *z, int scan, int req_comp)
2266{
2267 uint8 palette[1024], pal_img_n=0;
2268 uint8 has_trans=0, tc[3];
2269 uint32 ioff=0, idata_limit=0, i, pal_len=0;
2270 int first=1,k,interlace=0;
2271 stbi *s = &z->s;
2272
2273 if (!check_png_header(s)) return 0;
2274
2275 if (scan == SCAN_type) return 1;
2276
2277 for(;;first=0) {
2278 chunk c = get_chunk_header(s);
2279 if (first && c.type != PNG_TYPE('I','H','D','R')((('I') << 24) + (('H') << 16) + (('D') << 8
) + ('R'))
)
2280 return e("first not IHDR","Corrupt PNG")e("Corrupt PNG");
2281 switch (c.type) {
2282 case PNG_TYPE('I','H','D','R')((('I') << 24) + (('H') << 16) + (('D') << 8
) + ('R'))
: {
2283 int depth,color,comp,filter;
2284 if (!first) return e("multiple IHDR","Corrupt PNG")e("Corrupt PNG");
2285 if (c.length != 13) return e("bad IHDR len","Corrupt PNG")e("Corrupt PNG");
2286 s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)")e("Very large image (corrupt?)");
2287 s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)")e("Very large image (corrupt?)");
2288 depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only")e("PNG not supported: 8-bit only");
2289 color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG")e("Corrupt PNG");
2290 if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG")e("Corrupt PNG");
2291 comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG")e("Corrupt PNG");
2292 filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG")e("Corrupt PNG");
2293 interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG")e("Corrupt PNG");
2294 if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG")e("Corrupt PNG");
2295 if (!pal_img_n) {
2296 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2297 if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode")e("Image too large to decode");
2298 if (scan == SCAN_header) return 1;
2299 } else {
2300 // if paletted, then pal_n is our final components, and
2301 // img_n is # components to decompress/filter.
2302 s->img_n = 1;
2303 if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG")e("Corrupt PNG");
2304 // if SCAN_header, have to scan to see if we have a tRNS
2305 }
2306 break;
2307 }
2308
2309 case PNG_TYPE('P','L','T','E')((('P') << 24) + (('L') << 16) + (('T') << 8
) + ('E'))
: {
2310 if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG")e("Corrupt PNG");
2311 pal_len = c.length / 3;
2312 if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG")e("Corrupt PNG");
2313 for (i=0; i < pal_len; ++i) {
2314 palette[i*4+0] = get8u(s);
2315 palette[i*4+1] = get8u(s);
2316 palette[i*4+2] = get8u(s);
2317 palette[i*4+3] = 255;
2318 }
2319 break;
2320 }
2321
2322 case PNG_TYPE('t','R','N','S')((('t') << 24) + (('R') << 16) + (('N') << 8
) + ('S'))
: {
2323 if (z->idata) return e("tRNS after IDAT","Corrupt PNG")e("Corrupt PNG");
2324 if (pal_img_n) {
2325 if (scan == SCAN_header) { s->img_n = 4; return 1; }
2326 if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG")e("Corrupt PNG");
2327 if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG")e("Corrupt PNG");
2328 pal_img_n = 4;
2329 for (i=0; i < c.length; ++i)
2330 palette[i*4+3] = get8u(s);
2331 } else {
2332 if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG")e("Corrupt PNG");
2333 if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG")e("Corrupt PNG");
2334 has_trans = 1;
2335 for (k=0; k < s->img_n; ++k)
2336 tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2337 }
2338 break;
2339 }
2340
2341 case PNG_TYPE('I','D','A','T')((('I') << 24) + (('D') << 16) + (('A') << 8
) + ('T'))
: {
2342 if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG")e("Corrupt PNG");
2343 if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2344 if (ioff + c.length > idata_limit) {
2345 uint8 *p;
2346 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2347 while (ioff + c.length > idata_limit)
2348 idata_limit *= 2;
2349 p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL((void*)0)) return e("outofmem", "Out of memory")e("Out of memory");
2350 z->idata = p;
2351 }
2352 #ifndef STBI_NO_STDIO
2353 if (s->img_file)
2354 {
2355 if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG")e("Corrupt PNG");
2356 }
2357 else
2358 #endif
2359 {
2360 memcpy(z->idata+ioff, s->img_buffer, c.length);
2361 s->img_buffer += c.length;
2362 }
2363 ioff += c.length;
2364 break;
2365 }
2366
2367 case PNG_TYPE('I','E','N','D')((('I') << 24) + (('E') << 16) + (('N') << 8
) + ('D'))
: {
2368 uint32 raw_len;
2369 if (scan != SCAN_load) return 1;
2370 if (z->idata == NULL((void*)0)) return e("no IDAT","Corrupt PNG")e("Corrupt PNG");
2371 z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len);
2372 if (z->expanded == NULL((void*)0)) return 0; // zlib should set error
2373 free(z->idata); z->idata = NULL((void*)0);
2374 if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
2375 s->img_out_n = s->img_n+1;
2376 else
2377 s->img_out_n = s->img_n;
2378 if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
2379 if (has_trans)
2380 if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2381 if (pal_img_n) {
2382 // pal_img_n == 3 or 4
2383 s->img_n = pal_img_n; // record the actual colors we had
2384 s->img_out_n = pal_img_n;
2385 if (req_comp >= 3) s->img_out_n = req_comp;
2386 if (!expand_palette(z, palette, pal_len, s->img_out_n))
2387 return 0;
2388 }
2389 free(z->expanded); z->expanded = NULL((void*)0);
2390 return 1;
2391 }
2392
2393 default:
2394 // if critical, fail
2395 if ((c.type & (1 << 29)) == 0) {
2396 #ifndef STBI_NO_FAILURE_STRINGS
2397 #ifndef STBI_FAILURE_USERMSG1
2398 // not threadsafe
2399 static char invalid_chunk[] = "XXXX chunk not known";
2400 invalid_chunk[0] = (uint8) (c.type >> 24);
2401 invalid_chunk[1] = (uint8) (c.type >> 16);
2402 invalid_chunk[2] = (uint8) (c.type >> 8);
2403 invalid_chunk[3] = (uint8) (c.type >> 0);
2404 #endif
2405 #endif
2406 return e(invalid_chunk, "PNG not supported: unknown chunk type")e("PNG not supported: unknown chunk type");
2407 }
2408 skip(s, c.length);
2409 break;
2410 }
2411 // end of chunk, read and skip CRC
2412 get32(s);
2413 }
2414}
2415
2416static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2417{
2418 unsigned char *result=NULL((void*)0);
2419 p->expanded = NULL((void*)0);
2420 p->idata = NULL((void*)0);
2421 p->out = NULL((void*)0);
2422 if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error")((unsigned char *) (e("Internal error")?((void*)0):((void*)0)
))
;
2423 if (parse_png_file(p, SCAN_load, req_comp)) {
2424 result = p->out;
2425 p->out = NULL((void*)0);
2426 if (req_comp && req_comp != p->s.img_out_n) {
2427 result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
2428 p->s.img_out_n = req_comp;
2429 if (result == NULL((void*)0)) return result;
2430 }
2431 *x = p->s.img_x;
2432 *y = p->s.img_y;
2433 if (n) *n = p->s.img_n;
2434 }
2435 free(p->out); p->out = NULL((void*)0);
2436 free(p->expanded); p->expanded = NULL((void*)0);
2437 free(p->idata); p->idata = NULL((void*)0);
2438
2439 return result;
2440}
2441
2442#ifndef STBI_NO_STDIO
2443unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2444{
2445 png p;
2446 start_file(&p.s, f);
2447 return do_png(&p, x,y,comp,req_comp);
2448}
2449
2450unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2451{
2452 unsigned char *data;
2453 FILE *f = fopen(filename, "rb");
2454 if (!f) return NULL((void*)0);
2455 data = stbi_png_load_from_file(f,x,y,comp,req_comp);
2456 fclose(f);
2457 return data;
2458}
2459#endif
2460
2461unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2462{
2463 png p;
2464 start_mem(&p.s, buffer,len);
2465 return do_png(&p, x,y,comp,req_comp);
2466}
2467
2468#ifndef STBI_NO_STDIO
2469int stbi_png_test_file(FILE *f)
2470{
2471 png p;
2472 int n,r;
2473 n = ftell(f);
2474 start_file(&p.s, f);
2475 r = parse_png_file(&p, SCAN_type,STBI_default);
2476 fseek(f,n,SEEK_SET0);
2477 return r;
2478}
2479#endif
2480
2481int stbi_png_test_memory(stbi_uc const *buffer, int len)
2482{
2483 png p;
2484 start_mem(&p.s, buffer, len);
2485 return parse_png_file(&p, SCAN_type,STBI_default);
2486}
2487
2488// TODO: load header from png
2489#ifndef STBI_NO_STDIO
2490int stbi_png_info (char const *filename, int *x, int *y, int *comp)
2491{
2492 png p;
2493 FILE *f = fopen(filename, "rb");
2494 if (!f) return 0;
2495 start_file(&p.s, f);
2496 if (parse_png_file(&p, SCAN_header, 0)) {
2497 if(x) *x = p.s.img_x;
2498 if(y) *y = p.s.img_y;
2499 if (comp) *comp = p.s.img_n;
2500 fclose(f);
2501 return 1;
2502 }
2503 fclose(f);
2504 return 0;
2505}
2506
2507extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp);
2508#endif
2509extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2510
2511// Microsoft/Windows BMP image
2512
2513static int bmp_test(stbi *s)
2514{
2515 int sz;
2516 if (get8(s) != 'B') return 0;
2517 if (get8(s) != 'M') return 0;
2518 get32le(s); // discard filesize
2519 get16le(s); // discard reserved
2520 get16le(s); // discard reserved
2521 get32le(s); // discard data offset
2522 sz = get32le(s);
2523 if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
2524 return 0;
2525}
2526
2527#ifndef STBI_NO_STDIO
2528int stbi_bmp_test_file (FILE *f)
2529{
2530 stbi s;
2531 int r,n = ftell(f);
2532 start_file(&s,f);
2533 r = bmp_test(&s);
2534 fseek(f,n,SEEK_SET0);
2535 return r;
2536}
2537#endif
2538
2539int stbi_bmp_test_memory (stbi_uc const *buffer, int len)
2540{
2541 stbi s;
2542 start_mem(&s, buffer, len);
2543 return bmp_test(&s);
2544}
2545
2546// returns 0..31 for the highest set bit
2547static int high_bit(unsigned int z)
2548{
2549 int n=0;
2550 if (z == 0) return -1;
2551 if (z >= 0x10000) n += 16, z >>= 16;
2552 if (z >= 0x00100) n += 8, z >>= 8;
2553 if (z >= 0x00010) n += 4, z >>= 4;
2554 if (z >= 0x00004) n += 2, z >>= 2;
2555 if (z >= 0x00002) n += 1, z >>= 1;
2556 return n;
2557}
2558
2559static int bitcount(unsigned int a)
2560{
2561 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
2562 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
2563 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
2564 a = (a + (a >> 8)); // max 16 per 8 bits
2565 a = (a + (a >> 16)); // max 32 per 8 bits
2566 return a & 0xff;
2567}
2568
2569static int shiftsigned(int v, int shift, int bits)
2570{
2571 int result;
2572 int z=0;
2573
2574 if (shift < 0) v <<= -shift;
2575 else v >>= shift;
2576 result = v;
2577
2578 z = bits;
2579 while (z < 8) {
2580 result += v >> z;
2581 z += bits;
2582 }
2583 return result;
2584}
2585
2586static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2587{
2588 uint8 *out;
2589 unsigned int mr=0,mg=0,mb=0,ma=0;
2590 stbi_uc pal[256][4];
2591 int psize=0,i,j,compress=0,width;
2592 int bpp, flip_vertically, pad, target, offset, hsz;
2593 if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP")((unsigned char *) (e("Corrupt BMP")?((void*)0):((void*)0)));
2594 get32le(s); // discard filesize
2595 get16le(s); // discard reserved
2596 get16le(s); // discard reserved
2597 offset = get32le(s);
2598 hsz = get32le(s);
2599 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown")((unsigned char *) (e("BMP type not supported: unknown")?((void
*)0):((void*)0)))
;
2600 failure_reason = "bad BMP";
2601 if (hsz == 12) {
2602 s->img_x = get16le(s);
2603 s->img_y = get16le(s);
2604 } else {
2605 s->img_x = get32le(s);
2606 s->img_y = get32le(s);
2607 }
2608 if (get16le(s) != 1) return 0;
2609 bpp = get16le(s);
2610 if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit")((unsigned char *) (e("BMP type not supported: 1-bit")?((void
*)0):((void*)0)))
;
2611 flip_vertically = ((int) s->img_y) > 0;
2612 s->img_y = abs((int) s->img_y);
2613 if (hsz == 12) {
2614 if (bpp < 24)
2615 psize = (offset - 14 - 24) / 3;
2616 } else {
2617 compress = get32le(s);
2618 if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE")((unsigned char *) (e("BMP type not supported: RLE")?((void*)
0):((void*)0)))
;
2619 get32le(s); // discard sizeof
2620 get32le(s); // discard hres
2621 get32le(s); // discard vres
2622 get32le(s); // discard colorsused
2623 get32le(s); // discard max important
2624 if (hsz == 40 || hsz == 56) {
2625 if (hsz == 56) {
2626 get32le(s);
2627 get32le(s);
2628 get32le(s);
2629 get32le(s);
2630 }
2631 if (bpp == 16 || bpp == 32) {
2632 mr = mg = mb = 0;
2633 if (compress == 0) {
2634 if (bpp == 32) {
2635 mr = 0xff << 16;
2636 mg = 0xff << 8;
2637 mb = 0xff << 0;
2638 ma = 0xff << 24;
2639 } else {
2640 mr = 31 << 10;
2641 mg = 31 << 5;
2642 mb = 31 << 0;
2643 }
2644 } else if (compress == 3) {
2645 mr = get32le(s);
2646 mg = get32le(s);
2647 mb = get32le(s);
2648 // not documented, but generated by photoshop and handled by mspaint
2649 if (mr == mg && mg == mb) {
2650 // ?!?!?
2651 return NULL((void*)0);
2652 }
2653 } else
2654 return NULL((void*)0);
2655 }
2656 } else {
2657 assert(hsz == 108)((void) (0));
2658 mr = get32le(s);
2659 mg = get32le(s);
2660 mb = get32le(s);
2661 ma = get32le(s);
2662 get32le(s); // discard color space
2663 for (i=0; i < 12; ++i)
2664 get32le(s); // discard color space parameters
2665 }
2666 if (bpp < 16)
2667 psize = (offset - 14 - hsz) >> 2;
2668 }
2669 s->img_n = ma ? 4 : 3;
2670 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
2671 target = req_comp;
2672 else
2673 target = s->img_n; // if they want monochrome, we'll post-convert
2674 out = (stbi_uc *) malloc(target * s->img_x * s->img_y);
2675 if (!out) return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
;
2676 if (bpp < 16) {
2677 int z=0;
2678 if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP")((unsigned char *) (e("Corrupt BMP")?((void*)0):((void*)0))); }
2679 for (i=0; i < psize; ++i) {
2680 pal[i][2] = get8(s);
2681 pal[i][1] = get8(s);
2682 pal[i][0] = get8(s);
2683 if (hsz != 12) get8(s);
2684 pal[i][3] = 255;
2685 }
2686 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
2687 if (bpp == 4) width = (s->img_x + 1) >> 1;
2688 else if (bpp == 8) width = s->img_x;
2689 else { free(out); return epuc("bad bpp", "Corrupt BMP")((unsigned char *) (e("Corrupt BMP")?((void*)0):((void*)0))); }
2690 pad = (-width)&3;
2691 for (j=0; j < (int) s->img_y; ++j) {
2692 for (i=0; i < (int) s->img_x; i += 2) {
2693 int v=get8(s),v2=0;
2694 if (bpp == 4) {
2695 v2 = v & 15;
2696 v >>= 4;
2697 }
2698 out[z++] = pal[v][0];
2699 out[z++] = pal[v][1];
2700 out[z++] = pal[v][2];
2701 if (target == 4) out[z++] = 255;
2702 if (i+1 == (int) s->img_x) break;
2703 v = (bpp == 8) ? get8(s) : v2;
2704 out[z++] = pal[v][0];
2705 out[z++] = pal[v][1];
2706 out[z++] = pal[v][2];
2707 if (target == 4) out[z++] = 255;
2708 }
2709 skip(s, pad);
2710 }
2711 } else {
2712 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
2713 int z = 0;
2714 int easy=0;
2715 skip(s, offset - 14 - hsz);
2716 if (bpp == 24) width = 3 * s->img_x;
2717 else if (bpp == 16) width = 2*s->img_x;
2718 else /* bpp = 32 and pad = 0 */ width=0;
2719 pad = (-width) & 3;
2720 if (bpp == 24) {
2721 easy = 1;
2722 } else if (bpp == 32) {
2723 if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
2724 easy = 2;
2725 }
2726 if (!easy) {
2727 if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP")((unsigned char *) (e("Corrupt BMP")?((void*)0):((void*)0)));
2728 // right shift amt to put high bit in position #7
2729 rshift = high_bit(mr)-7; rcount = bitcount(mr);
2730 gshift = high_bit(mg)-7; gcount = bitcount(mr);
2731 bshift = high_bit(mb)-7; bcount = bitcount(mr);
2732 ashift = high_bit(ma)-7; acount = bitcount(mr);
2733 }
2734 for (j=0; j < (int) s->img_y; ++j) {
2735 if (easy) {
2736 for (i=0; i < (int) s->img_x; ++i) {
2737 int a;
2738 out[z+2] = get8(s);
2739 out[z+1] = get8(s);
2740 out[z+0] = get8(s);
2741 z += 3;
2742 a = (easy == 2 ? get8(s) : 255);
2743 if (target == 4) out[z++] = a;
2744 }
2745 } else {
2746 for (i=0; i < (int) s->img_x; ++i) {
2747 uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
2748 int a;
2749 out[z++] = shiftsigned(v & mr, rshift, rcount);
2750 out[z++] = shiftsigned(v & mg, gshift, gcount);
2751 out[z++] = shiftsigned(v & mb, bshift, bcount);
2752 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
2753 if (target == 4) out[z++] = a;
2754 }
2755 }
2756 skip(s, pad);
2757 }
2758 }
2759 if (flip_vertically) {
2760 stbi_uc t;
2761 for (j=0; j < (int) s->img_y>>1; ++j) {
2762 stbi_uc *p1 = out + j *s->img_x*target;
2763 stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
2764 for (i=0; i < (int) s->img_x*target; ++i) {
2765 t = p1[i], p1[i] = p2[i], p2[i] = t;
2766 }
2767 }
2768 }
2769
2770 if (req_comp && req_comp != target) {
2771 out = convert_format(out, target, req_comp, s->img_x, s->img_y);
2772 if (out == NULL((void*)0)) return out; // convert_format frees input on failure
2773 }
2774
2775 *x = s->img_x;
2776 *y = s->img_y;
2777 if (comp) *comp = target;
2778 return out;
2779}
2780
2781#ifndef STBI_NO_STDIO
2782stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp)
2783{
2784 stbi_uc *data;
2785 FILE *f = fopen(filename, "rb");
2786 if (!f) return NULL((void*)0);
2787 data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
2788 fclose(f);
2789 return data;
2790}
2791
2792stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp)
2793{
2794 stbi s;
2795 start_file(&s, f);
2796 return bmp_load(&s, x,y,comp,req_comp);
2797}
2798#endif
2799
2800stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2801{
2802 stbi s;
2803 start_mem(&s, buffer, len);
2804 return bmp_load(&s, x,y,comp,req_comp);
2805}
2806
2807// Targa Truevision - TGA
2808// by Jonathan Dummer
2809
2810static int tga_test(stbi *s)
2811{
2812 int sz;
2813 get8u(s); // discard Offset
2814 sz = get8u(s); // color type
2815 if( sz > 1 ) return 0; // only RGB or indexed allowed
2816 sz = get8u(s); // image type
2817 if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE
2818 get16(s); // discard palette start
2819 get16(s); // discard palette length
2820 get8(s); // discard bits per palette color entry
2821 get16(s); // discard x origin
2822 get16(s); // discard y origin
2823 if( get16(s) < 1 ) return 0; // test width
2824 if( get16(s) < 1 ) return 0; // test height
2825 sz = get8(s); // bits per pixel
2826 if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed
2827 return 1; // seems to have passed everything
2828}
2829
2830#ifndef STBI_NO_STDIO
2831int stbi_tga_test_file (FILE *f)
2832{
2833 stbi s;
2834 int r,n = ftell(f);
2835 start_file(&s, f);
2836 r = tga_test(&s);
2837 fseek(f,n,SEEK_SET0);
2838 return r;
2839}
2840#endif
2841
2842int stbi_tga_test_memory (stbi_uc const *buffer, int len)
2843{
2844 stbi s;
2845 start_mem(&s, buffer, len);
2846 return tga_test(&s);
2847}
2848
2849static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2850{
2851 // read in the TGA header stuff
2852 int tga_offset = get8u(s);
2853 int tga_indexed = get8u(s);
2854 int tga_image_type = get8u(s);
2855 int tga_is_RLE = 0;
2856 int tga_palette_start = get16le(s);
2857 int tga_palette_len = get16le(s);
2858 int tga_palette_bits = get8u(s);
2859 int tga_x_origin = get16le(s);
2860 int tga_y_origin = get16le(s);
2861 int tga_width = get16le(s);
2862 int tga_height = get16le(s);
2863 int tga_bits_per_pixel = get8u(s);
2864 int tga_inverted = get8u(s);
2865 // image data
2866 unsigned char *tga_data;
2867 unsigned char *tga_palette = NULL((void*)0);
2868 int i, j;
2869 unsigned char raw_data[4];
2870 unsigned char trans_data[4] = {0,0,0,0};
2871 int RLE_count = 0;
2872 int RLE_repeating = 0;
2873 int read_next_pixel = 1;
2874 // do a tiny bit of precessing
2875 if( tga_image_type >= 8 )
2876 {
2877 tga_image_type -= 8;
2878 tga_is_RLE = 1;
2879 }
2880 /* int tga_alpha_bits = tga_inverted & 15; */
2881 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
2882
2883 // error check
2884 if( //(tga_indexed) ||
2885 (tga_width < 1) || (tga_height < 1) ||
2886 (tga_image_type < 1) || (tga_image_type > 3) ||
2887 ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
2888 (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
2889 )
2890 {
2891 return NULL((void*)0);
2892 }
2893
2894 // If I'm paletted, then I'll use the number of bits from the palette
2895 if( tga_indexed )
2896 {
2897 tga_bits_per_pixel = tga_palette_bits;
2898 }
2899
2900 // tga info
2901 *x = tga_width;
2902 *y = tga_height;
2903 if( (req_comp < 1) || (req_comp > 4) )
2904 {
2905 // just use whatever the file was
2906 req_comp = tga_bits_per_pixel / 8;
2907 *comp = req_comp;
2908 } else
2909 {
2910 // force a new number of components
2911 *comp = tga_bits_per_pixel/8;
2912 }
2913 tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp );
2914
2915 // skip to the data's starting position (offset usually = 0)
2916 skip(s, tga_offset );
2917 // do I need to load a palette?
2918 if( tga_indexed )
2919 {
2920 // any data to skip? (offset usually = 0)
2921 skip(s, tga_palette_start );
2922 // load the palette
2923 tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 );
2924 getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 );
2925 }
2926 // load the data
2927 for( i = 0; i < tga_width * tga_height; ++i )
2928 {
2929 // if I'm in RLE mode, do I need to get a RLE chunk?
2930 if( tga_is_RLE )
2931 {
2932 if( RLE_count == 0 )
2933 {
2934 // yep, get the next byte as a RLE command
2935 int RLE_cmd = get8u(s);
2936 RLE_count = 1 + (RLE_cmd & 127);
2937 RLE_repeating = RLE_cmd >> 7;
2938 read_next_pixel = 1;
2939 } else if( !RLE_repeating )
2940 {
2941 read_next_pixel = 1;
2942 }
2943 } else
2944 {
2945 read_next_pixel = 1;
2946 }
2947 // OK, if I need to read a pixel, do it now
2948 if( read_next_pixel )
2949 {
2950 // load however much data we did have
2951 if( tga_indexed )
2952 {
2953 // read in 1 byte, then perform the lookup
2954 int pal_idx = get8u(s);
2955 if( pal_idx >= tga_palette_len )
2956 {
2957 // invalid index
2958 pal_idx = 0;
2959 }
2960 pal_idx *= tga_bits_per_pixel / 8;
2961 for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2962 {
2963 raw_data[j] = tga_palette[pal_idx+j];
2964 }
2965 } else
2966 {
2967 // read in the data raw
2968 for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2969 {
2970 raw_data[j] = get8u(s);
2971 }
2972 }
2973 // convert raw to the intermediate format
2974 switch( tga_bits_per_pixel )
2975 {
2976 case 8:
2977 // Luminous => RGBA
2978 trans_data[0] = raw_data[0];
2979 trans_data[1] = raw_data[0];
2980 trans_data[2] = raw_data[0];
2981 trans_data[3] = 255;
2982 break;
2983 case 16:
2984 // Luminous,Alpha => RGBA
2985 trans_data[0] = raw_data[0];
2986 trans_data[1] = raw_data[0];
2987 trans_data[2] = raw_data[0];
2988 trans_data[3] = raw_data[1];
2989 break;
2990 case 24:
2991 // BGR => RGBA
2992 trans_data[0] = raw_data[2];
2993 trans_data[1] = raw_data[1];
2994 trans_data[2] = raw_data[0];
2995 trans_data[3] = 255;
2996 break;
2997 case 32:
2998 // BGRA => RGBA
2999 trans_data[0] = raw_data[2];
3000 trans_data[1] = raw_data[1];
3001 trans_data[2] = raw_data[0];
3002 trans_data[3] = raw_data[3];
3003 break;
3004 default:
3005 return NULL((void*)0);
3006 }
3007 // clear the reading flag for the next pixel
3008 read_next_pixel = 0;
3009 } // end of reading a pixel
3010 // convert to final format
3011 switch( req_comp )
3012 {
3013 case 1:
3014 // RGBA => Luminance
3015 tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3016 break;
3017 case 2:
3018 // RGBA => Luminance,Alpha
3019 tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3020 tga_data[i*req_comp+1] = trans_data[3];
3021 break;
3022 case 3:
3023 // RGBA => RGB
3024 tga_data[i*req_comp+0] = trans_data[0];
3025 tga_data[i*req_comp+1] = trans_data[1];
3026 tga_data[i*req_comp+2] = trans_data[2];
3027 break;
3028 case 4:
3029 // RGBA => RGBA
3030 tga_data[i*req_comp+0] = trans_data[0];
3031 tga_data[i*req_comp+1] = trans_data[1];
3032 tga_data[i*req_comp+2] = trans_data[2];
3033 tga_data[i*req_comp+3] = trans_data[3];
3034 break;
3035 }
3036 // in case we're in RLE mode, keep counting down
3037 --RLE_count;
3038 }
3039 // do I need to invert the image?
3040 if( tga_inverted )
3041 {
3042 for( j = 0; j*2 < tga_height; ++j )
3043 {
3044 int index1 = j * tga_width * req_comp;
3045 int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3046 for( i = tga_width * req_comp; i > 0; --i )
3047 {
3048 unsigned char temp = tga_data[index1];
3049 tga_data[index1] = tga_data[index2];
3050 tga_data[index2] = temp;
3051 ++index1;
3052 ++index2;
3053 }
3054 }
3055 }
3056 // clear my palette, if I had one
3057 if( tga_palette != NULL((void*)0) )
3058 {
3059 free( tga_palette );
3060 }
3061 // the things I do to get rid of an error message, and yet keep
3062 // Microsoft's C compilers happy... [8^(
3063 tga_palette_start = tga_palette_len = tga_palette_bits =
3064 tga_x_origin = tga_y_origin = 0;
3065 // OK, done
3066 return tga_data;
3067}
3068
3069#ifndef STBI_NO_STDIO
3070stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp)
3071{
3072 stbi_uc *data;
3073 FILE *f = fopen(filename, "rb");
3074 if (!f) return NULL((void*)0);
3075 data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
3076 fclose(f);
3077 return data;
3078}
3079
3080stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp)
3081{
3082 stbi s;
3083 start_file(&s, f);
3084 return tga_load(&s, x,y,comp,req_comp);
3085}
3086#endif
3087
3088stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3089{
3090 stbi s;
3091 start_mem(&s, buffer, len);
3092 return tga_load(&s, x,y,comp,req_comp);
3093}
3094
3095
3096// *************************************************************************************************
3097// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB
3098
3099static int psd_test(stbi *s)
3100{
3101 if (get32(s) != 0x38425053) return 0; // "8BPS"
3102 else return 1;
3103}
3104
3105#ifndef STBI_NO_STDIO
3106int stbi_psd_test_file(FILE *f)
3107{
3108 stbi s;
3109 int r,n = ftell(f);
3110 start_file(&s, f);
3111 r = psd_test(&s);
3112 fseek(f,n,SEEK_SET0);
3113 return r;
3114}
3115#endif
3116
3117int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3118{
3119 stbi s;
3120 start_mem(&s, buffer, len);
3121 return psd_test(&s);
3122}
3123
3124static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3125{
3126 int pixelCount;
3127 int channelCount, compression;
3128 int channel, i, count, len;
3129 int w,h;
3130 uint8 *out;
3131
3132 // Check identifier
3133 if (get32(s) != 0x38425053) // "8BPS"
3134 return epuc("not PSD", "Corrupt PSD image")((unsigned char *) (e("Corrupt PSD image")?((void*)0):((void*
)0)))
;
3135
3136 // Check file type version.
3137 if (get16(s) != 1)
3138 return epuc("wrong version", "Unsupported version of PSD image")((unsigned char *) (e("Unsupported version of PSD image")?((void
*)0):((void*)0)))
;
3139
3140 // Skip 6 reserved bytes.
3141 skip(s, 6 );
3142
3143 // Read the number of channels (R, G, B, A, etc).
3144 channelCount = get16(s);
3145 if (channelCount < 0 || channelCount > 16)
3146 return epuc("wrong channel count", "Unsupported number of channels in PSD image")((unsigned char *) (e("Unsupported number of channels in PSD image"
)?((void*)0):((void*)0)))
;
3147
3148 // Read the rows and columns of the image.
3149 h = get32(s);
3150 w = get32(s);
3151
3152 // Make sure the depth is 8 bits.
3153 if (get16(s) != 8)
3154 return epuc("unsupported bit depth", "PSD bit depth is not 8 bit")((unsigned char *) (e("PSD bit depth is not 8 bit")?((void*)0
):((void*)0)))
;
3155
3156 // Make sure the color mode is RGB.
3157 // Valid options are:
3158 // 0: Bitmap
3159 // 1: Grayscale
3160 // 2: Indexed color
3161 // 3: RGB color
3162 // 4: CMYK color
3163 // 7: Multichannel
3164 // 8: Duotone
3165 // 9: Lab color
3166 if (get16(s) != 3)
3167 return epuc("wrong color format", "PSD is not in RGB color format")((unsigned char *) (e("PSD is not in RGB color format")?((void
*)0):((void*)0)))
;
3168
3169 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
3170 skip(s,get32(s) );
3171
3172 // Skip the image resources. (resolution, pen tool paths, etc)
3173 skip(s, get32(s) );
3174
3175 // Skip the reserved data.
3176 skip(s, get32(s) );
3177
3178 // Find out if the data is compressed.
3179 // Known values:
3180 // 0: no compression
3181 // 1: RLE compressed
3182 compression = get16(s);
3183 if (compression > 1)
3184 return epuc("bad compression", "PSD has an unknown compression format")((unsigned char *) (e("PSD has an unknown compression format"
)?((void*)0):((void*)0)))
;
3185
3186 // Create the destination image.
3187 out = (stbi_uc *) malloc(4 * w*h);
3188 if (!out) return epuc("outofmem", "Out of memory")((unsigned char *) (e("Out of memory")?((void*)0):((void*)0))
)
;
3189 pixelCount = w*h;
3190
3191 // Initialize the data to zero.
3192 //memset( out, 0, pixelCount * 4 );
3193
3194 // Finally, the image data.
3195 if (compression) {
3196 // RLE as used by .PSD and .TIFF
3197 // Loop until you get the number of unpacked bytes you are expecting:
3198 // Read the next source byte into n.
3199 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3200 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3201 // Else if n is 128, noop.
3202 // Endloop
3203
3204 // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3205 // which we're going to just skip.
3206 skip(s, h * channelCount * 2 );
3207
3208 // Read the RLE data by channel.
3209 for (channel = 0; channel < 4; channel++) {
3210 uint8 *p;
3211
3212 p = out+channel;
3213 if (channel >= channelCount) {
3214 // Fill this channel with default data.
3215 for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3216 } else {
3217 // Read the RLE data.
3218 count = 0;
3219 while (count < pixelCount) {
3220 len = get8(s);
3221 if (len == 128) {
3222 // No-op.
3223 } else if (len < 128) {
3224 // Copy next len+1 bytes literally.
3225 len++;
3226 count += len;
3227 while (len) {
3228 *p = get8(s);
3229 p += 4;
3230 len--;
3231 }
3232 } else if (len > 128) {
3233 uint32 val;
3234 // Next -len+1 bytes in the dest are replicated from next source byte.
3235 // (Interpret len as a negative 8-bit int.)
3236 len ^= 0x0FF;
3237 len += 2;
3238 val = get8(s);
3239 count += len;
3240 while (len) {
3241 *p = val;
3242 p += 4;
3243 len--;
3244 }
3245 }
3246 }
3247 }
3248 }
3249
3250 } else {
3251 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
3252 // where each channel consists of an 8-bit value for each pixel in the image.
3253
3254 // Read the data by channel.
3255 for (channel = 0; channel < 4; channel++) {
3256 uint8 *p;
3257
3258 p = out + channel;
3259 if (channel > channelCount) {
3260 // Fill this channel with default data.
3261 for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3262 } else {
3263 // Read the data.
3264 count = 0;
Value stored to 'count' is never read
3265 for (i = 0; i < pixelCount; i++)
3266 *p = get8(s), p += 4;
3267 }
3268 }
3269 }
3270
3271 if (req_comp && req_comp != 4) {
3272 out = convert_format(out, 4, req_comp, w, h);
3273 if (out == NULL((void*)0)) return out; // convert_format frees input on failure
3274 }
3275
3276 if (comp) *comp = channelCount;
3277 *y = h;
3278 *x = w;
3279
3280 return out;
3281}
3282
3283#ifndef STBI_NO_STDIO
3284stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3285{
3286 stbi_uc *data;
3287 FILE *f = fopen(filename, "rb");
3288 if (!f) return NULL((void*)0);
3289 data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
3290 fclose(f);
3291 return data;
3292}
3293
3294stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3295{
3296 stbi s;
3297 start_file(&s, f);
3298 return psd_load(&s, x,y,comp,req_comp);
3299}
3300#endif
3301
3302stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3303{
3304 stbi s;
3305 start_mem(&s, buffer, len);
3306 return psd_load(&s, x,y,comp,req_comp);
3307}
3308
3309
3310// *************************************************************************************************
3311// Radiance RGBE HDR loader
3312// originally by Nicolas Schulz
3313#ifndef STBI_NO_HDR
3314static int hdr_test(stbi *s)
3315{
3316 const char *signature = "#?RADIANCE\n";
3317 int i;
3318 for (i=0; signature[i]; ++i)
3319 if (get8(s) != signature[i])
3320 return 0;
3321 return 1;
3322}
3323
3324int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
3325{
3326 stbi s;
3327 start_mem(&s, buffer, len);
3328 return hdr_test(&s);
3329}
3330
3331#ifndef STBI_NO_STDIO
3332int stbi_hdr_test_file(FILE *f)
3333{
3334 stbi s;
3335 int r,n = ftell(f);
3336 start_file(&s, f);
3337 r = hdr_test(&s);
3338 fseek(f,n,SEEK_SET0);
3339 return r;
3340}
3341#endif
3342
3343#define HDR_BUFLEN1024 1024
3344static char *hdr_gettoken(stbi *z, char *buffer)
3345{
3346 int len=0;
3347 char c = '\0';
3348
3349 c = get8(z);
3350
3351 while (!at_eof(z) && c != '\n') {
3352 buffer[len++] = c;
3353 if (len == HDR_BUFLEN1024-1) {
3354 // flush to end of line
3355 while (!at_eof(z) && get8(z) != '\n')
3356 ;
3357 break;
3358 }
3359 c = get8(z);
3360 }
3361
3362 buffer[len] = 0;
3363 return buffer;
3364}
3365
3366static void hdr_convert(float *output, stbi_uc *input, int req_comp)
3367{
3368 if( input[3] != 0 ) {
3369 float f1;
3370 // Exponent
3371 f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
3372 if (req_comp <= 2)
3373 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
3374 else {
3375 output[0] = input[0] * f1;
3376 output[1] = input[1] * f1;
3377 output[2] = input[2] * f1;
3378 }
3379 if (req_comp == 2) output[1] = 1;
3380 if (req_comp == 4) output[3] = 1;
3381 } else {
3382 switch (req_comp) {
3383 case 4: output[3] = 1; /* fallthrough */
3384 case 3: output[0] = output[1] = output[2] = 0;
3385 break;
3386 case 2: output[1] = 1; /* fallthrough */
3387 case 1: output[0] = 0;
3388 break;
3389 }
3390 }
3391}
3392
3393
3394static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3395{
3396 char buffer[HDR_BUFLEN1024];
3397 char *token;
3398 int valid = 0;
3399 int width, height;
3400 stbi_uc *scanline;
3401 float *hdr_data;
3402 int len;
3403 unsigned char count, value;
3404 int i, j, k, c1,c2, z;
3405
3406
3407 // Check identifier
3408 if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3409 return epf("not HDR", "Corrupt HDR image")((float *) (e("Corrupt HDR image")?((void*)0):((void*)0)));
3410
3411 // Parse header
3412 while(1) {
3413 token = hdr_gettoken(s,buffer);
3414 if (token[0] == 0) break;
3415 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3416 }
3417
3418 if (!valid) return epf("unsupported format", "Unsupported HDR format")((float *) (e("Unsupported HDR format")?((void*)0):((void*)0)
))
;
3419
3420 // Parse width and height
3421 // can't use sscanf() if we're not using stdio!
3422 token = hdr_gettoken(s,buffer);
3423 if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format")((float *) (e("Unsupported HDR format")?((void*)0):((void*)0)
))
;
3424 token += 3;
3425 height = strtol(token, &token, 10);
3426 while (*token == ' ') ++token;
3427 if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format")((float *) (e("Unsupported HDR format")?((void*)0):((void*)0)
))
;
3428 token += 3;
3429 width = strtol(token, NULL((void*)0), 10);
3430
3431 *x = width;
3432 *y = height;
3433
3434 *comp = 3;
3435 if (req_comp == 0) req_comp = 3;
3436
3437 // Read data
3438 hdr_data = (float *) malloc(height * width * req_comp * sizeof(float));
3439
3440 // Load image data
3441 // image data is stored as some number of sca
3442 if( width < 8 || width >= 32768) {
3443 // Read flat data
3444 for (j=0; j < height; ++j) {
3445 for (i=0; i < width; ++i) {
3446 stbi_uc rgbe[4];
3447 main_decode_loop:
3448 getn(s, rgbe, 4);
3449 hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
3450 }
3451 }
3452 } else {
3453 // Read RLE-encoded data
3454 scanline = NULL((void*)0);
3455
3456 for (j = 0; j < height; ++j) {
3457 c1 = get8(s);
3458 c2 = get8(s);
3459 len = get8(s);
3460 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3461 // not run-length encoded, so we have to actually use THIS data as a decoded
3462 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3463 stbi_uc rgbe[4] = { c1,c2,len, get8(s) };
3464 hdr_convert(hdr_data, rgbe, req_comp);
3465 i = 1;
3466 j = 0;
3467 free(scanline);
3468 goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format
3469 }
3470 len <<= 8;
3471 len |= get8(s);
3472 if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR")((float *) (e("corrupt HDR")?((void*)0):((void*)0))); }
3473 if (scanline == NULL((void*)0)) scanline = (stbi_uc *) malloc(width * 4);
3474
3475 for (k = 0; k < 4; ++k) {
3476 i = 0;
3477 while (i < width) {
3478 count = get8(s);
3479 if (count > 128) {
3480 // Run
3481 value = get8(s);
3482 count -= 128;
3483 for (z = 0; z < count; ++z)
3484 scanline[i++ * 4 + k] = value;
3485 } else {
3486 // Dump
3487 for (z = 0; z < count; ++z)
3488 scanline[i++ * 4 + k] = get8(s);
3489 }
3490 }
3491 }
3492 for (i=0; i < width; ++i)
3493 hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
3494 }
3495 free(scanline);
3496 }
3497
3498 return hdr_data;
3499}
3500
3501static stbi_uc *hdr_load_rgbe(stbi *s, int *x, int *y, int *comp, int req_comp)
3502{
3503 char buffer[HDR_BUFLEN1024];
3504 char *token;
3505 int valid = 0;
3506 int width, height;
3507 stbi_uc *scanline;
3508 stbi_uc *rgbe_data;
3509 int len;
3510 unsigned char count, value;
3511 int i, j, k, c1,c2, z;
3512
3513
3514 // Check identifier
3515 if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3516 return epuc("not HDR", "Corrupt HDR image")((unsigned char *) (e("Corrupt HDR image")?((void*)0):((void*
)0)))
;
3517
3518 // Parse header
3519 while(1) {
3520 token = hdr_gettoken(s,buffer);
3521 if (token[0] == 0) break;
3522 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3523 }
3524
3525 if (!valid) return epuc("unsupported format", "Unsupported HDR format")((unsigned char *) (e("Unsupported HDR format")?((void*)0):((
void*)0)))
;
3526
3527 // Parse width and height
3528 // can't use sscanf() if we're not using stdio!
3529 token = hdr_gettoken(s,buffer);
3530 if (strncmp(token, "-Y ", 3)) return epuc("unsupported data layout", "Unsupported HDR format")((unsigned char *) (e("Unsupported HDR format")?((void*)0):((
void*)0)))
;
3531 token += 3;
3532 height = strtol(token, &token, 10);
3533 while (*token == ' ') ++token;
3534 if (strncmp(token, "+X ", 3)) return epuc("unsupported data layout", "Unsupported HDR format")((unsigned char *) (e("Unsupported HDR format")?((void*)0):((
void*)0)))
;
3535 token += 3;
3536 width = strtol(token, NULL((void*)0), 10);
3537
3538 *x = width;
3539 *y = height;
3540
3541 // RGBE _MUST_ come out as 4 components
3542 *comp = 4;
3543 req_comp = 4;
3544
3545 // Read data
3546 rgbe_data = (stbi_uc *) malloc(height * width * req_comp * sizeof(stbi_uc));
3547 // point to the beginning
3548 scanline = rgbe_data;
3549
3550 // Load image data
3551 // image data is stored as some number of scan lines
3552 if( width < 8 || width >= 32768) {
3553 // Read flat data
3554 for (j=0; j < height; ++j) {
3555 for (i=0; i < width; ++i) {
3556 main_decode_loop:
3557 //getn(rgbe, 4);
3558 getn(s,scanline, 4);
3559 scanline += 4;
3560 }
3561 }
3562 } else {
3563 // Read RLE-encoded data
3564 for (j = 0; j < height; ++j) {
3565 c1 = get8(s);
3566 c2 = get8(s);
3567 len = get8(s);
3568 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3569 // not run-length encoded, so we have to actually use THIS data as a decoded
3570 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3571 scanline[0] = c1;
3572 scanline[1] = c2;
3573 scanline[2] = len;
3574 scanline[3] = get8(s);
3575 scanline += 4;
3576 i = 1;
3577 j = 0;
3578 goto main_decode_loop; // yes, this is insane; blame the insane format
3579 }
3580 len <<= 8;
3581 len |= get8(s);
3582 if (len != width) { free(rgbe_data); return epuc("invalid decoded scanline length", "corrupt HDR")((unsigned char *) (e("corrupt HDR")?((void*)0):((void*)0))); }
3583 for (k = 0; k < 4; ++k) {
3584 i = 0;
3585 while (i < width) {
3586 count = get8(s);
3587 if (count > 128) {
3588 // Run
3589 value = get8(s);
3590 count -= 128;
3591 for (z = 0; z < count; ++z)
3592 scanline[i++ * 4 + k] = value;
3593 } else {
3594 // Dump
3595 for (z = 0; z < count; ++z)
3596 scanline[i++ * 4 + k] = get8(s);
3597 }
3598 }
3599 }
3600 // move the scanline on
3601 scanline += 4 * width;
3602 }
3603 }
3604
3605 return rgbe_data;
3606}
3607
3608#ifndef STBI_NO_STDIO
3609float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3610{
3611 stbi s;
3612 start_file(&s,f);
3613 return hdr_load(&s,x,y,comp,req_comp);
3614}
3615
3616stbi_uc *stbi_hdr_load_rgbe_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3617{
3618 stbi s;
3619 start_file(&s,f);
3620 return hdr_load_rgbe(&s,x,y,comp,req_comp);
3621}
3622
3623stbi_uc *stbi_hdr_load_rgbe (char const *filename, int *x, int *y, int *comp, int req_comp)
3624{
3625 FILE *f = fopen(filename, "rb");
3626 unsigned char *result;
3627 if (!f) return epuc("can't fopen", "Unable to open file")((unsigned char *) (e("Unable to open file")?((void*)0):((void
*)0)))
;
3628 result = stbi_hdr_load_rgbe_file(f,x,y,comp,req_comp);
3629 fclose(f);
3630 return result;
3631}
3632#endif
3633
3634float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3635{
3636 stbi s;
3637 start_mem(&s,buffer, len);
3638 return hdr_load(&s,x,y,comp,req_comp);
3639}
3640
3641stbi_uc *stbi_hdr_load_rgbe_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
3642{
3643 stbi s;
3644 start_mem(&s,buffer, len);
3645 return hdr_load_rgbe(&s,x,y,comp,req_comp);
3646}
3647
3648#endif // STBI_NO_HDR
3649
3650/////////////////////// write image ///////////////////////
3651
3652#ifndef STBI_NO_WRITE
3653
3654static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); }
3655
3656static void writefv(FILE *f, const char *fmt, va_list v)
3657{
3658 while (*fmt) {
3659 switch (*fmt++) {
3660 case ' ': break;
3661 case '1': { uint8 x = va_arg(v, int)__builtin_va_arg(v, int); write8(f,x); break; }
3662 case '2': { int16 x = va_arg(v, int)__builtin_va_arg(v, int); write8(f,x); write8(f,x>>8); break; }
3663 case '4': { int32 x = va_arg(v, int)__builtin_va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; }
3664 default:
3665 assert(0)((void) (0));
3666 va_end(v)__builtin_va_end(v);
3667 return;
3668 }
3669 }
3670}
3671
3672static void writef(FILE *f, const char *fmt, ...)
3673{
3674 va_list v;
3675 va_start(v, fmt)__builtin_va_start(v, fmt);
3676 writefv(f,fmt,v);
3677 va_end(v)__builtin_va_end(v);
3678}
3679
3680static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad)
3681{
3682 uint8 bg[3] = { 255, 0, 255}, px[3];
3683 uint32 zero = 0;
3684 int i,j,k, j_end;
3685
3686 if (vdir < 0)
3687 j_end = -1, j = y-1;
3688 else
3689 j_end = y, j = 0;
3690
3691 for (; j != j_end; j += vdir) {
3692 for (i=0; i < x; ++i) {
3693 uint8 *d = (uint8 *) data + (j*x+i)*comp;
3694 if (write_alpha < 0)
3695 fwrite(&d[comp-1], 1, 1, f);
3696 switch (comp) {
3697 case 1:
3698 case 2: writef(f, "111", d[0],d[0],d[0]);
3699 break;
3700 case 4:
3701 if (!write_alpha) {
3702 for (k=0; k < 3; ++k)
3703 px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255;
3704 writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]);
3705 break;
3706 }
3707 /* FALLTHROUGH */
3708 case 3:
3709 writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]);
3710 break;
3711 }
3712 if (write_alpha > 0)
3713 fwrite(&d[comp-1], 1, 1, f);
3714 }
3715 fwrite(&zero,scanline_pad,1,f);
3716 }
3717}
3718
3719static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, const char *fmt, ...)
3720{
3721 FILE *f = fopen(filename, "wb");
3722 if (f) {
3723 va_list v;
3724 va_start(v, fmt)__builtin_va_start(v, fmt);
3725 writefv(f, fmt, v);
3726 va_end(v)__builtin_va_end(v);
3727 write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad);
3728 fclose(f);
3729 }
3730 return f != NULL((void*)0);
3731}
3732
3733int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data)
3734{
3735 int pad = (-x*3) & 3;
3736 return outfile(filename,-1,-1,x,y,comp,data,0,pad,
3737 "11 4 22 4" "4 44 22 444444",
3738 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
3739 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
3740}
3741
3742int stbi_write_tga(char const *filename, int x, int y, int comp, void *data)
3743{
3744 int has_alpha = !(comp & 1);
3745 return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0,
3746 "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha);
3747}
3748
3749// any other image formats that do interleaved rgb data?
3750// PNG: requires adler32,crc32 -- significant amount of code
3751// PSD: no, channels output separately
3752// TIFF: no, stripwise-interleaved... i think
3753
3754#endif // STBI_NO_WRITE
3755
3756// add in my DDS loading support
3757#ifndef STBI_NO_DDS
3758#include "stbi_DDS_aug_c.h"
3759#endif