All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
stb_image.h
1 /* stb_image - v2.19 - public domain image loader - http://nothings.org/stb
2  no warranty implied; use at your own risk
3 
4  Do this:
5  #define STB_IMAGE_IMPLEMENTATION
6  before you include this file in *one* C or C++ file to create the implementation.
7 
8  // i.e. it should look like this:
9  #include ...
10  #include ...
11  #include ...
12  #define STB_IMAGE_IMPLEMENTATION
13  #include "stb_image.h"
14 
15  You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16  And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19  QUICK NOTES:
20  Primarily of interest to game developers and other people who can
21  avoid problematic images and only need the trivial interface
22 
23  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24  PNG 1/2/4/8/16-bit-per-channel
25 
26  TGA (not sure what subset, if a subset)
27  BMP non-1bpp, non-RLE
28  PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30  GIF (*comp always reports as 4-channel)
31  HDR (radiance rgbE format)
32  PIC (Softimage PIC)
33  PNM (PPM and PGM binary only)
34 
35  Animated GIF still needs a proper API, but here's one way to do it:
36  http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39  - decode from arbitrary I/O callbacks
40  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42  Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47  See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51  2.19 (2018-02-11) fix warning
52  2.18 (2018-01-30) fix warnings
53  2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
54  2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
55  2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
56  2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
57  2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
58  2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
59  2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
60  RGB-format JPEG; remove white matting in PSD;
61  allocate large structures on the stack;
62  correct channel count for PNG & BMP
63  2.10 (2016-01-22) avoid warning introduced in 2.09
64  2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
65 
66  See end of file for full revision history.
67 
68 
69  ============================ Contributors =========================
70 
71  Image formats Extensions, features
72  Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
73  Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
74  Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
75  Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
76  Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
77  Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
78  Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
79  github:urraka (animated gif) Junggon Kim (PNM comments)
80  Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
81  socks-the-fox (16-bit PNG)
82  Jeremy Sawicki (handle all ImageNet JPGs)
83  Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
84  Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
85  Arseny Kapoulkine
86  John-Mark Allen
87 
88  Bug & warning fixes
89  Marc LeBlanc David Woo Guillaume George Martins Mozeiko
90  Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan
91  Dave Moore Roy Eltham Hayaki Saito Nathan Reed
92  Won Chun Luke Graham Johan Duparc Nick Verigakis
93  the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh
94  Janez Zemva John Bartholomew Michal Cichon github:romigrou
95  Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
96  Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar
97  Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex
98  Ryamond Barbiero Paul Du Bois Engin Manap github:grim210
99  Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw
100  Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus
101  Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo
102  Christian Floisand Kevin Schmidt github:darealshinji
103  Blazej Dariusz Roszkowski github:Michaelangel007
104 */
105 
106 #ifndef STBI_INCLUDE_STB_IMAGE_H
107 #define STBI_INCLUDE_STB_IMAGE_H
108 
109 // DOCUMENTATION
110 //
111 // Limitations:
112 // - no 12-bit-per-channel JPEG
113 // - no JPEGs with arithmetic coding
114 // - GIF always returns *comp=4
115 //
116 // Basic usage (see HDR discussion below for HDR usage):
117 // int x,y,n;
118 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
119 // // ... process data if not NULL ...
120 // // ... x = width, y = height, n = # 8-bit components per pixel ...
121 // // ... replace '0' with '1'..'4' to force that many components per pixel
122 // // ... but 'n' will always be the number that it would have been if you said 0
123 // stbi_image_free(data)
124 //
125 // Standard parameters:
126 // int *x -- outputs image width in pixels
127 // int *y -- outputs image height in pixels
128 // int *channels_in_file -- outputs # of image components in image file
129 // int desired_channels -- if non-zero, # of image components requested in result
130 //
131 // The return value from an image loader is an 'unsigned char *' which points
132 // to the pixel data, or NULL on an allocation failure or if the image is
133 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
134 // with each pixel consisting of N interleaved 8-bit components; the first
135 // pixel pointed to is top-left-most in the image. There is no padding between
136 // image scanlines or between pixels, regardless of format. The number of
137 // components N is 'desired_channels' if desired_channels is non-zero, or
138 // *channels_in_file otherwise. If desired_channels is non-zero,
139 // *channels_in_file has the number of components that _would_ have been
140 // output otherwise. E.g. if you set desired_channels to 4, you will always
141 // get RGBA output, but you can check *channels_in_file to see if it's trivially
142 // opaque because e.g. there were only 3 channels in the source image.
143 //
144 // An output image with N components has the following components interleaved
145 // in this order in each pixel:
146 //
147 // N=#comp components
148 // 1 grey
149 // 2 grey, alpha
150 // 3 red, green, blue
151 // 4 red, green, blue, alpha
152 //
153 // If image loading fails for any reason, the return value will be NULL,
154 // and *x, *y, *channels_in_file will be unchanged. The function
155 // stbi_failure_reason() can be queried for an extremely brief, end-user
156 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
157 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
158 // more user-friendly ones.
159 //
160 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
161 //
162 // ===========================================================================
163 //
164 // Philosophy
165 //
166 // stb libraries are designed with the following priorities:
167 //
168 // 1. easy to use
169 // 2. easy to maintain
170 // 3. good performance
171 //
172 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
173 // and for best performance I may provide less-easy-to-use APIs that give higher
174 // performance, in addition to the easy to use ones. Nevertheless, it's important
175 // to keep in mind that from the standpoint of you, a client of this library,
176 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
177 //
178 // Some secondary priorities arise directly from the first two, some of which
179 // make more explicit reasons why performance can't be emphasized.
180 //
181 // - Portable ("ease of use")
182 // - Small source code footprint ("easy to maintain")
183 // - No dependencies ("ease of use")
184 //
185 // ===========================================================================
186 //
187 // I/O callbacks
188 //
189 // I/O callbacks allow you to read from arbitrary sources, like packaged
190 // files or some other source. Data read from callbacks are processed
191 // through a small internal buffer (currently 128 bytes) to try to reduce
192 // overhead.
193 //
194 // The three functions you must define are "read" (reads some bytes of data),
195 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
196 //
197 // ===========================================================================
198 //
199 // SIMD support
200 //
201 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
202 // supported by the compiler. For ARM Neon support, you must explicitly
203 // request it.
204 //
205 // (The old do-it-yourself SIMD API is no longer supported in the current
206 // code.)
207 //
208 // On x86, SSE2 will automatically be used when available based on a run-time
209 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
210 // the typical path is to have separate builds for NEON and non-NEON devices
211 // (at least this is true for iOS and Android). Therefore, the NEON support is
212 // toggled by a build flag: define STBI_NEON to get NEON loops.
213 //
214 // If for some reason you do not want to use any of SIMD code, or if
215 // you have issues compiling it, you can disable it entirely by
216 // defining STBI_NO_SIMD.
217 //
218 // ===========================================================================
219 //
220 // HDR image support (disable by defining STBI_NO_HDR)
221 //
222 // stb_image now supports loading HDR images in general, and currently
223 // the Radiance .HDR file format, although the support is provided
224 // generically. You can still load any file through the existing interface;
225 // if you attempt to load an HDR file, it will be automatically remapped to
226 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
227 // both of these constants can be reconfigured through this interface:
228 //
229 // stbi_hdr_to_ldr_gamma(2.2f);
230 // stbi_hdr_to_ldr_scale(1.0f);
231 //
232 // (note, do not use _inverse_ constants; stbi_image will invert them
233 // appropriately).
234 //
235 // Additionally, there is a new, parallel interface for loading files as
236 // (linear) floats to preserve the full dynamic range:
237 //
238 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
239 //
240 // If you load LDR images through this interface, those images will
241 // be promoted to floating point values, run through the inverse of
242 // constants corresponding to the above:
243 //
244 // stbi_ldr_to_hdr_scale(1.0f);
245 // stbi_ldr_to_hdr_gamma(2.2f);
246 //
247 // Finally, given a filename (or an open file or memory block--see header
248 // file for details) containing image data, you can query for the "most
249 // appropriate" interface to use (that is, whether the image is HDR or
250 // not), using:
251 //
252 // stbi_is_hdr(char *filename);
253 //
254 // ===========================================================================
255 //
256 // iPhone PNG support:
257 //
258 // By default we convert iphone-formatted PNGs back to RGB, even though
259 // they are internally encoded differently. You can disable this conversion
260 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
261 // you will always just get the native iphone "format" through (which
262 // is BGR stored in RGB).
263 //
264 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
265 // pixel to remove any premultiplied alpha *only* if the image file explicitly
266 // says there's premultiplied data (currently only happens in iPhone images,
267 // and only if iPhone convert-to-rgb processing is on).
268 //
269 // ===========================================================================
270 //
271 // ADDITIONAL CONFIGURATION
272 //
273 // - You can suppress implementation of any of the decoders to reduce
274 // your code footprint by #defining one or more of the following
275 // symbols before creating the implementation.
276 //
277 // STBI_NO_JPEG
278 // STBI_NO_PNG
279 // STBI_NO_BMP
280 // STBI_NO_PSD
281 // STBI_NO_TGA
282 // STBI_NO_GIF
283 // STBI_NO_HDR
284 // STBI_NO_PIC
285 // STBI_NO_PNM (.ppm and .pgm)
286 //
287 // - You can request *only* certain decoders and suppress all other ones
288 // (this will be more forward-compatible, as addition of new decoders
289 // doesn't require you to disable them explicitly):
290 //
291 // STBI_ONLY_JPEG
292 // STBI_ONLY_PNG
293 // STBI_ONLY_BMP
294 // STBI_ONLY_PSD
295 // STBI_ONLY_TGA
296 // STBI_ONLY_GIF
297 // STBI_ONLY_HDR
298 // STBI_ONLY_PIC
299 // STBI_ONLY_PNM (.ppm and .pgm)
300 //
301 // - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
302 // want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
303 //
304 
305 
306 #ifndef STBI_NO_STDIO
307 #include <stdio.h>
308 #endif // STBI_NO_STDIO
309 
310 #define STBI_VERSION 1
311 
312 enum
313 {
314  STBI_default = 0, // only used for desired_channels
315 
316  STBI_grey = 1,
317  STBI_grey_alpha = 2,
318  STBI_rgb = 3,
319  STBI_rgb_alpha = 4
320 };
321 
322 typedef unsigned char stbi_uc;
323 typedef unsigned short stbi_us;
324 
325 #ifdef __cplusplus
326 extern "C" {
327 #endif
328 
329 #ifdef STB_IMAGE_STATIC
330 #define STBIDEF static
331 #else
332 #define STBIDEF extern
333 #endif
334 
336 //
337 // PRIMARY API - works on images of any type
338 //
339 
340 //
341 // load image by filename, open file, or memory buffer
342 //
343 
344 typedef struct
345 {
346  int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
347  void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
348  int (*eof) (void *user); // returns nonzero if we are at end of file/data
349 } stbi_io_callbacks;
350 
352 //
353 // 8-bits-per-channel interface
354 //
355 
356 STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels);
357 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
358 #ifndef STBI_NO_GIF
359 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
360 #endif
361 
362 
363 #ifndef STBI_NO_STDIO
364 STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
365 STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
366 // for stbi_load_from_file, file pointer is left pointing immediately after image
367 #endif
368 
370 //
371 // 16-bits-per-channel interface
372 //
373 
374 STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
375 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
376 
377 #ifndef STBI_NO_STDIO
378 STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
379 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
380 #endif
381 
383 //
384 // float-per-channel interface
385 //
386 #ifndef STBI_NO_LINEAR
387  STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
388  STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
389 
390  #ifndef STBI_NO_STDIO
391  STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
392  STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
393  #endif
394 #endif
395 
396 #ifndef STBI_NO_HDR
397  STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
398  STBIDEF void stbi_hdr_to_ldr_scale(float scale);
399 #endif // STBI_NO_HDR
400 
401 #ifndef STBI_NO_LINEAR
402  STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
403  STBIDEF void stbi_ldr_to_hdr_scale(float scale);
404 #endif // STBI_NO_LINEAR
405 
406 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
407 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
408 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
409 #ifndef STBI_NO_STDIO
410 STBIDEF int stbi_is_hdr (char const *filename);
411 STBIDEF int stbi_is_hdr_from_file(FILE *f);
412 #endif // STBI_NO_STDIO
413 
414 
415 // get a VERY brief reason for failure
416 // NOT THREADSAFE
417 STBIDEF const char *stbi_failure_reason (void);
418 
419 // free the loaded image -- this is just free()
420 STBIDEF void stbi_image_free (void *retval_from_stbi_load);
421 
422 // get image dimensions & components without fully decoding
423 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, float *gamma);
424 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, float *gamma);
425 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
426 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
427 
428 #ifndef STBI_NO_STDIO
429 STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp, float *gamma);
430 STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp, float *gamma);
431 STBIDEF int stbi_is_16_bit (char const *filename);
432 STBIDEF int stbi_is_16_bit_from_file(FILE *f);
433 #endif
434 
435 
436 
437 // for image formats that explicitly notate that they have premultiplied alpha,
438 // we just return the colors as stored in the file. set this flag to force
439 // unpremultiplication. results are undefined if the unpremultiply overflow.
440 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
441 
442 // indicate whether we should process iphone images back to canonical format,
443 // or just pass them through "as-is"
444 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
445 
446 // flip the image vertically, so the first pixel in the output array is the bottom left
447 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
448 
449 // ZLIB client - used by PNG, available for other purposes
450 
451 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
452 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
453 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
454 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
455 
456 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
457 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
458 
459 
460 #ifdef __cplusplus
461 }
462 #endif
463 
464 //
465 //
467 #endif // STBI_INCLUDE_STB_IMAGE_H
468 
469 #ifdef STB_IMAGE_IMPLEMENTATION
470 
471 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
472  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
473  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
474  || defined(STBI_ONLY_ZLIB)
475  #ifndef STBI_ONLY_JPEG
476  #define STBI_NO_JPEG
477  #endif
478  #ifndef STBI_ONLY_PNG
479  #define STBI_NO_PNG
480  #endif
481  #ifndef STBI_ONLY_BMP
482  #define STBI_NO_BMP
483  #endif
484  #ifndef STBI_ONLY_PSD
485  #define STBI_NO_PSD
486  #endif
487  #ifndef STBI_ONLY_TGA
488  #define STBI_NO_TGA
489  #endif
490  #ifndef STBI_ONLY_GIF
491  #define STBI_NO_GIF
492  #endif
493  #ifndef STBI_ONLY_HDR
494  #define STBI_NO_HDR
495  #endif
496  #ifndef STBI_ONLY_PIC
497  #define STBI_NO_PIC
498  #endif
499  #ifndef STBI_ONLY_PNM
500  #define STBI_NO_PNM
501  #endif
502 #endif
503 
504 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
505 #define STBI_NO_ZLIB
506 #endif
507 
508 
509 #include <stdarg.h>
510 #include <stddef.h> // ptrdiff_t on osx
511 #include <stdlib.h>
512 #include <string.h>
513 #include <limits.h>
514 
515 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
516 #include <math.h> // ldexp, pow
517 #endif
518 
519 #ifndef STBI_NO_STDIO
520 #include <stdio.h>
521 #endif
522 
523 #ifndef STBI_ASSERT
524 #include <assert.h>
525 #define STBI_ASSERT(x) assert(x)
526 #endif
527 
528 
529 #ifndef _MSC_VER
530  #ifdef __cplusplus
531  #define stbi_inline inline
532  #else
533  #define stbi_inline
534  #endif
535 #else
536  #define stbi_inline __forceinline
537 #endif
538 
539 
540 #ifdef _MSC_VER
541 typedef unsigned short stbi__uint16;
542 typedef signed short stbi__int16;
543 typedef unsigned int stbi__uint32;
544 typedef signed int stbi__int32;
545 #else
546 #include <stdint.h>
547 typedef uint16_t stbi__uint16;
548 typedef int16_t stbi__int16;
549 typedef uint32_t stbi__uint32;
550 typedef int32_t stbi__int32;
551 #endif
552 
553 // should produce compiler error if size is wrong
554 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
555 
556 #ifdef _MSC_VER
557 #define STBI_NOTUSED(v) (void)(v)
558 #else
559 #define STBI_NOTUSED(v) (void)sizeof(v)
560 #endif
561 
562 #ifdef _MSC_VER
563 #define STBI_HAS_LROTL
564 #endif
565 
566 #ifdef STBI_HAS_LROTL
567  #define stbi_lrot(x,y) _lrotl(x,y)
568 #else
569  #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
570 #endif
571 
572 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
573 // ok
574 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
575 // ok
576 #else
577 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
578 #endif
579 
580 #ifndef STBI_MALLOC
581 #define STBI_MALLOC(sz) malloc(sz)
582 #define STBI_REALLOC(p,newsz) realloc(p,newsz)
583 #define STBI_FREE(p) free(p)
584 #endif
585 
586 #ifndef STBI_REALLOC_SIZED
587 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
588 #endif
589 
590 // x86/x64 detection
591 #if defined(__x86_64__) || defined(_M_X64)
592 #define STBI__X64_TARGET
593 #elif defined(__i386) || defined(_M_IX86)
594 #define STBI__X86_TARGET
595 #endif
596 
597 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
598 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
599 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
600 // but previous attempts to provide the SSE2 functions with runtime
601 // detection caused numerous issues. The way architecture extensions are
602 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
603 // New behavior: if compiled with -msse2, we use SSE2 without any
604 // detection; if not, we don't use it at all.
605 #define STBI_NO_SIMD
606 #endif
607 
608 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
609 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
610 //
611 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
612 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
613 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
614 // simultaneously enabling "-mstackrealign".
615 //
616 // See https://github.com/nothings/stb/issues/81 for more information.
617 //
618 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
619 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
620 #define STBI_NO_SIMD
621 #endif
622 
623 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
624 #define STBI_SSE2
625 #include <emmintrin.h>
626 
627 #ifdef _MSC_VER
628 
629 #if _MSC_VER >= 1400 // not VC6
630 #include <intrin.h> // __cpuid
631 static int stbi__cpuid3(void)
632 {
633  int info[4];
634  __cpuid(info,1);
635  return info[3];
636 }
637 #else
638 static int stbi__cpuid3(void)
639 {
640  int res;
641  __asm {
642  mov eax,1
643  cpuid
644  mov res,edx
645  }
646  return res;
647 }
648 #endif
649 
650 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
651 
652 static int stbi__sse2_available(void)
653 {
654  int info3 = stbi__cpuid3();
655  return ((info3 >> 26) & 1) != 0;
656 }
657 #else // assume GCC-style if not VC++
658 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
659 
660 static int stbi__sse2_available(void)
661 {
662  // If we're even attempting to compile this on GCC/Clang, that means
663  // -msse2 is on, which means the compiler is allowed to use SSE2
664  // instructions at will, and so are we.
665  return 1;
666 }
667 #endif
668 #endif
669 
670 // ARM NEON
671 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
672 #undef STBI_NEON
673 #endif
674 
675 #ifdef STBI_NEON
676 #include <arm_neon.h>
677 // assume GCC or Clang on ARM targets
678 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
679 #endif
680 
681 #ifndef STBI_SIMD_ALIGN
682 #define STBI_SIMD_ALIGN(type, name) type name
683 #endif
684 
686 //
687 // stbi__context struct and start_xxx functions
688 
689 // stbi__context structure is our basic context used by all images, so it
690 // contains all the IO context, plus some basic image information
691 typedef struct
692 {
693  stbi__uint32 img_x, img_y;
694  int img_n, img_out_n;
695 
696  stbi_io_callbacks io;
697  void *io_user_data;
698 
699  int read_from_callbacks;
700  int buflen;
701  stbi_uc buffer_start[128];
702 
703  stbi_uc *img_buffer, *img_buffer_end;
704  stbi_uc *img_buffer_original, *img_buffer_original_end;
705 } stbi__context;
706 
707 
708 static void stbi__refill_buffer(stbi__context *s);
709 
710 // initialize a memory-decode context
711 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
712 {
713  s->io.read = NULL;
714  s->read_from_callbacks = 0;
715  s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
716  s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
717 }
718 
719 // initialize a callback-based context
720 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
721 {
722  s->io = *c;
723  s->io_user_data = user;
724  s->buflen = sizeof(s->buffer_start);
725  s->read_from_callbacks = 1;
726  s->img_buffer_original = s->buffer_start;
727  stbi__refill_buffer(s);
728  s->img_buffer_original_end = s->img_buffer_end;
729 }
730 
731 #ifndef STBI_NO_STDIO
732 
733 static int stbi__stdio_read(void *user, char *data, int size)
734 {
735  return (int) fread(data,1,size,(FILE*) user);
736 }
737 
738 static void stbi__stdio_skip(void *user, int n)
739 {
740  fseek((FILE*) user, n, SEEK_CUR);
741 }
742 
743 static int stbi__stdio_eof(void *user)
744 {
745  return feof((FILE*) user);
746 }
747 
748 static stbi_io_callbacks stbi__stdio_callbacks =
749 {
750  stbi__stdio_read,
751  stbi__stdio_skip,
752  stbi__stdio_eof,
753 };
754 
755 static void stbi__start_file(stbi__context *s, FILE *f)
756 {
757  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
758 }
759 
760 //static void stop_file(stbi__context *s) { }
761 
762 #endif // !STBI_NO_STDIO
763 
764 static void stbi__rewind(stbi__context *s)
765 {
766  // conceptually rewind SHOULD rewind to the beginning of the stream,
767  // but we just rewind to the beginning of the initial buffer, because
768  // we only use it after doing 'test', which only ever looks at at most 92 bytes
769  s->img_buffer = s->img_buffer_original;
770  s->img_buffer_end = s->img_buffer_original_end;
771 }
772 
773 enum
774 {
775  STBI_ORDER_RGB,
776  STBI_ORDER_BGR
777 };
778 
779 typedef struct
780 {
781  int bits_per_channel;
782  int num_channels;
783  int channel_order;
784 } stbi__result_info;
785 
786 #ifndef STBI_NO_JPEG
787 static int stbi__jpeg_test(stbi__context *s);
788 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
789 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
790 #endif
791 
792 #ifndef STBI_NO_PNG
793 static int stbi__png_test(stbi__context *s);
794 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
795 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp, float *gamma);
796 static int stbi__png_is16(stbi__context *s);
797 #endif
798 
799 #ifndef STBI_NO_BMP
800 static int stbi__bmp_test(stbi__context *s);
801 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
802 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
803 #endif
804 
805 #ifndef STBI_NO_TGA
806 static int stbi__tga_test(stbi__context *s);
807 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
808 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
809 #endif
810 
811 #ifndef STBI_NO_PSD
812 static int stbi__psd_test(stbi__context *s);
813 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
814 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
815 static int stbi__psd_is16(stbi__context *s);
816 #endif
817 
818 #ifndef STBI_NO_HDR
819 static int stbi__hdr_test(stbi__context *s);
820 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
821 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
822 #endif
823 
824 #ifndef STBI_NO_PIC
825 static int stbi__pic_test(stbi__context *s);
826 static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
827 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
828 #endif
829 
830 #ifndef STBI_NO_GIF
831 static int stbi__gif_test(stbi__context *s);
832 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
833 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
834 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
835 #endif
836 
837 #ifndef STBI_NO_PNM
838 static int stbi__pnm_test(stbi__context *s);
839 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
840 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
841 #endif
842 
843 // this is not threadsafe
844 static const char *stbi__g_failure_reason;
845 
846 STBIDEF const char *stbi_failure_reason(void)
847 {
848  return stbi__g_failure_reason;
849 }
850 
851 static int stbi__err(const char *str)
852 {
853  stbi__g_failure_reason = str;
854  return 0;
855 }
856 
857 static void *stbi__malloc(size_t size)
858 {
859  return STBI_MALLOC(size);
860 }
861 
862 // stb_image uses ints pervasively, including for offset calculations.
863 // therefore the largest decoded image size we can support with the
864 // current code, even on 64-bit targets, is INT_MAX. this is not a
865 // significant limitation for the intended use case.
866 //
867 // we do, however, need to make sure our size calculations don't
868 // overflow. hence a few helper functions for size calculations that
869 // multiply integers together, making sure that they're non-negative
870 // and no overflow occurs.
871 
872 // return 1 if the sum is valid, 0 on overflow.
873 // negative terms are considered invalid.
874 static int stbi__addsizes_valid(int a, int b)
875 {
876  if (b < 0) return 0;
877  // now 0 <= b <= INT_MAX, hence also
878  // 0 <= INT_MAX - b <= INTMAX.
879  // And "a + b <= INT_MAX" (which might overflow) is the
880  // same as a <= INT_MAX - b (no overflow)
881  return a <= INT_MAX - b;
882 }
883 
884 // returns 1 if the product is valid, 0 on overflow.
885 // negative factors are considered invalid.
886 static int stbi__mul2sizes_valid(int a, int b)
887 {
888  if (a < 0 || b < 0) return 0;
889  if (b == 0) return 1; // mul-by-0 is always safe
890  // portable way to check for no overflows in a*b
891  return a <= INT_MAX/b;
892 }
893 
894 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
895 static int stbi__mad2sizes_valid(int a, int b, int add)
896 {
897  return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
898 }
899 
900 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
901 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
902 {
903  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
904  stbi__addsizes_valid(a*b*c, add);
905 }
906 
907 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
908 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
909 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
910 {
911  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
912  stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
913 }
914 #endif
915 
916 // mallocs with size overflow checking
917 static void *stbi__malloc_mad2(int a, int b, int add)
918 {
919  if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
920  return stbi__malloc(a*b + add);
921 }
922 
923 static void *stbi__malloc_mad3(int a, int b, int c, int add)
924 {
925  if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
926  return stbi__malloc(a*b*c + add);
927 }
928 
929 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
930 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
931 {
932  if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
933  return stbi__malloc(a*b*c*d + add);
934 }
935 #endif
936 
937 // stbi__err - error
938 // stbi__errpf - error returning pointer to float
939 // stbi__errpuc - error returning pointer to unsigned char
940 
941 #ifdef STBI_NO_FAILURE_STRINGS
942  #define stbi__err(x,y) 0
943 #elif defined(STBI_FAILURE_USERMSG)
944  #define stbi__err(x,y) stbi__err(y)
945 #else
946  #define stbi__err(x,y) stbi__err(x)
947 #endif
948 
949 #define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
950 #define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
951 
952 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
953 {
954  STBI_FREE(retval_from_stbi_load);
955 }
956 
957 #ifndef STBI_NO_LINEAR
958 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
959 #endif
960 
961 #ifndef STBI_NO_HDR
962 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
963 #endif
964 
965 static int stbi__vertically_flip_on_load = 0;
966 
967 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
968 {
969  stbi__vertically_flip_on_load = flag_true_if_should_flip;
970 }
971 
972 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
973 {
974  memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
975  ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
976  ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
977  ri->num_channels = 0;
978 
979  #ifndef STBI_NO_JPEG
980  if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
981  #endif
982  #ifndef STBI_NO_PNG
983  if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
984  #endif
985  #ifndef STBI_NO_BMP
986  if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
987  #endif
988  #ifndef STBI_NO_GIF
989  if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
990  #endif
991  #ifndef STBI_NO_PSD
992  if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
993  #endif
994  #ifndef STBI_NO_PIC
995  if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
996  #endif
997  #ifndef STBI_NO_PNM
998  if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
999  #endif
1000 
1001  #ifndef STBI_NO_HDR
1002  if (stbi__hdr_test(s)) {
1003  float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1004  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1005  }
1006  #endif
1007 
1008  #ifndef STBI_NO_TGA
1009  // test tga last because it's a crappy test!
1010  if (stbi__tga_test(s))
1011  return stbi__tga_load(s,x,y,comp,req_comp, ri);
1012  #endif
1013 
1014  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1015 }
1016 
1017 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1018 {
1019  int i;
1020  int img_len = w * h * channels;
1021  stbi_uc *reduced;
1022 
1023  reduced = (stbi_uc *) stbi__malloc(img_len);
1024  if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1025 
1026  for (i = 0; i < img_len; ++i)
1027  reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1028 
1029  STBI_FREE(orig);
1030  return reduced;
1031 }
1032 
1033 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1034 {
1035  int i;
1036  int img_len = w * h * channels;
1037  stbi__uint16 *enlarged;
1038 
1039  enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1040  if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1041 
1042  for (i = 0; i < img_len; ++i)
1043  enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1044 
1045  STBI_FREE(orig);
1046  return enlarged;
1047 }
1048 
1049 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1050 {
1051  int row;
1052  size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1053  stbi_uc temp[2048];
1054  stbi_uc *bytes = (stbi_uc *)image;
1055 
1056  for (row = 0; row < (h>>1); row++) {
1057  stbi_uc *row0 = bytes + row*bytes_per_row;
1058  stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1059  // swap row0 with row1
1060  size_t bytes_left = bytes_per_row;
1061  while (bytes_left) {
1062  size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1063  memcpy(temp, row0, bytes_copy);
1064  memcpy(row0, row1, bytes_copy);
1065  memcpy(row1, temp, bytes_copy);
1066  row0 += bytes_copy;
1067  row1 += bytes_copy;
1068  bytes_left -= bytes_copy;
1069  }
1070  }
1071 }
1072 
1073 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1074 {
1075  int slice;
1076  int slice_size = w * h * bytes_per_pixel;
1077 
1078  stbi_uc *bytes = (stbi_uc *)image;
1079  for (slice = 0; slice < z; ++slice) {
1080  stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1081  bytes += slice_size;
1082  }
1083 }
1084 
1085 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1086 {
1087  stbi__result_info ri;
1088  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1089 
1090  if (result == NULL)
1091  return NULL;
1092 
1093  if (ri.bits_per_channel != 8) {
1094  STBI_ASSERT(ri.bits_per_channel == 16);
1095  result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1096  ri.bits_per_channel = 8;
1097  }
1098 
1099  // @TODO: move stbi__convert_format to here
1100 
1101  if (stbi__vertically_flip_on_load) {
1102  int channels = req_comp ? req_comp : *comp;
1103  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1104  }
1105 
1106  return (unsigned char *) result;
1107 }
1108 
1109 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1110 {
1111  stbi__result_info ri;
1112  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1113 
1114  if (result == NULL)
1115  return NULL;
1116 
1117  if (ri.bits_per_channel != 16) {
1118  STBI_ASSERT(ri.bits_per_channel == 8);
1119  result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1120  ri.bits_per_channel = 16;
1121  }
1122 
1123  // @TODO: move stbi__convert_format16 to here
1124  // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1125 
1126  if (stbi__vertically_flip_on_load) {
1127  int channels = req_comp ? req_comp : *comp;
1128  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1129  }
1130 
1131  return (stbi__uint16 *) result;
1132 }
1133 
1134 #if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR)
1135 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1136 {
1137  if (stbi__vertically_flip_on_load && result != NULL) {
1138  int channels = req_comp ? req_comp : *comp;
1139  stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1140  }
1141 }
1142 #endif
1143 
1144 #ifndef STBI_NO_STDIO
1145 
1146 static FILE *stbi__fopen(char const *filename, char const *mode)
1147 {
1148  FILE *f;
1149 #if defined(_MSC_VER) && _MSC_VER >= 1400
1150  if (0 != fopen_s(&f, filename, mode))
1151  f=0;
1152 #else
1153  f = fopen(filename, mode);
1154 #endif
1155  return f;
1156 }
1157 
1158 
1159 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1160 {
1161  FILE *f = stbi__fopen(filename, "rb");
1162  unsigned char *result;
1163  if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1164  result = stbi_load_from_file(f,x,y,comp,req_comp);
1165  fclose(f);
1166  return result;
1167 }
1168 
1169 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1170 {
1171  unsigned char *result;
1172  stbi__context s;
1173  stbi__start_file(&s,f);
1174  result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1175  if (result) {
1176  // need to 'unget' all the characters in the IO buffer
1177  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1178  }
1179  return result;
1180 }
1181 
1182 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1183 {
1184  stbi__uint16 *result;
1185  stbi__context s;
1186  stbi__start_file(&s,f);
1187  result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1188  if (result) {
1189  // need to 'unget' all the characters in the IO buffer
1190  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1191  }
1192  return result;
1193 }
1194 
1195 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1196 {
1197  FILE *f = stbi__fopen(filename, "rb");
1198  stbi__uint16 *result;
1199  if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1200  result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1201  fclose(f);
1202  return result;
1203 }
1204 
1205 
1206 #endif
1207 
1208 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1209 {
1210  stbi__context s;
1211  stbi__start_mem(&s,buffer,len);
1212  return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1213 }
1214 
1215 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1216 {
1217  stbi__context s;
1218  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1219  return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1220 }
1221 
1222 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1223 {
1224  stbi__context s;
1225  stbi__start_mem(&s,buffer,len);
1226  return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1227 }
1228 
1229 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1230 {
1231  stbi__context s;
1232  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1233  return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1234 }
1235 
1236 #ifndef STBI_NO_GIF
1237 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1238 {
1239  unsigned char *result;
1240  stbi__context s;
1241  stbi__start_mem(&s,buffer,len);
1242 
1243  result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1244  if (stbi__vertically_flip_on_load) {
1245  stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1246  }
1247 
1248  return result;
1249 }
1250 #endif
1251 
1252 #ifndef STBI_NO_LINEAR
1253 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1254 {
1255  unsigned char *data;
1256  #ifndef STBI_NO_HDR
1257  if (stbi__hdr_test(s)) {
1258  stbi__result_info ri;
1259  float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1260  if (hdr_data)
1261  stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1262  return hdr_data;
1263  }
1264  #endif
1265  data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1266  if (data)
1267  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1268  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1269 }
1270 
1271 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1272 {
1273  stbi__context s;
1274  stbi__start_mem(&s,buffer,len);
1275  return stbi__loadf_main(&s,x,y,comp,req_comp);
1276 }
1277 
1278 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1279 {
1280  stbi__context s;
1281  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1282  return stbi__loadf_main(&s,x,y,comp,req_comp);
1283 }
1284 
1285 #ifndef STBI_NO_STDIO
1286 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1287 {
1288  float *result;
1289  FILE *f = stbi__fopen(filename, "rb");
1290  if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1291  result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1292  fclose(f);
1293  return result;
1294 }
1295 
1296 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1297 {
1298  stbi__context s;
1299  stbi__start_file(&s,f);
1300  return stbi__loadf_main(&s,x,y,comp,req_comp);
1301 }
1302 #endif // !STBI_NO_STDIO
1303 
1304 #endif // !STBI_NO_LINEAR
1305 
1306 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1307 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1308 // reports false!
1309 
1310 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1311 {
1312  #ifndef STBI_NO_HDR
1313  stbi__context s;
1314  stbi__start_mem(&s,buffer,len);
1315  return stbi__hdr_test(&s);
1316  #else
1317  STBI_NOTUSED(buffer);
1318  STBI_NOTUSED(len);
1319  return 0;
1320  #endif
1321 }
1322 
1323 #ifndef STBI_NO_STDIO
1324 STBIDEF int stbi_is_hdr (char const *filename)
1325 {
1326  FILE *f = stbi__fopen(filename, "rb");
1327  int result=0;
1328  if (f) {
1329  result = stbi_is_hdr_from_file(f);
1330  fclose(f);
1331  }
1332  return result;
1333 }
1334 
1335 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1336 {
1337  #ifndef STBI_NO_HDR
1338  long pos = ftell(f);
1339  int res;
1340  stbi__context s;
1341  stbi__start_file(&s,f);
1342  res = stbi__hdr_test(&s);
1343  fseek(f, pos, SEEK_SET);
1344  return res;
1345  #else
1346  STBI_NOTUSED(f);
1347  return 0;
1348  #endif
1349 }
1350 #endif // !STBI_NO_STDIO
1351 
1352 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1353 {
1354  #ifndef STBI_NO_HDR
1355  stbi__context s;
1356  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1357  return stbi__hdr_test(&s);
1358  #else
1359  STBI_NOTUSED(clbk);
1360  STBI_NOTUSED(user);
1361  return 0;
1362  #endif
1363 }
1364 
1365 #ifndef STBI_NO_LINEAR
1366 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1367 
1368 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1369 STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1370 #endif
1371 
1372 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1373 
1374 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1375 STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1376 
1377 
1379 //
1380 // Common code used by all image loaders
1381 //
1382 
1383 enum
1384 {
1385  STBI__SCAN_load=0,
1386  STBI__SCAN_type,
1387  STBI__SCAN_header
1388 };
1389 
1390 static void stbi__refill_buffer(stbi__context *s)
1391 {
1392  int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1393  if (n == 0) {
1394  // at end of file, treat same as if from memory, but need to handle case
1395  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1396  s->read_from_callbacks = 0;
1397  s->img_buffer = s->buffer_start;
1398  s->img_buffer_end = s->buffer_start+1;
1399  *s->img_buffer = 0;
1400  } else {
1401  s->img_buffer = s->buffer_start;
1402  s->img_buffer_end = s->buffer_start + n;
1403  }
1404 }
1405 
1406 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1407 {
1408  if (s->img_buffer < s->img_buffer_end)
1409  return *s->img_buffer++;
1410  if (s->read_from_callbacks) {
1411  stbi__refill_buffer(s);
1412  return *s->img_buffer++;
1413  }
1414  return 0;
1415 }
1416 
1417 stbi_inline static int stbi__at_eof(stbi__context *s)
1418 {
1419  if (s->io.read) {
1420  if (!(s->io.eof)(s->io_user_data)) return 0;
1421  // if feof() is true, check if buffer = end
1422  // special case: we've only got the special 0 character at the end
1423  if (s->read_from_callbacks == 0) return 1;
1424  }
1425 
1426  return s->img_buffer >= s->img_buffer_end;
1427 }
1428 
1429 static void stbi__skip(stbi__context *s, int n)
1430 {
1431  if (n < 0) {
1432  s->img_buffer = s->img_buffer_end;
1433  return;
1434  }
1435  if (s->io.read) {
1436  int blen = (int) (s->img_buffer_end - s->img_buffer);
1437  if (blen < n) {
1438  s->img_buffer = s->img_buffer_end;
1439  (s->io.skip)(s->io_user_data, n - blen);
1440  return;
1441  }
1442  }
1443  s->img_buffer += n;
1444 }
1445 
1446 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1447 {
1448  if (s->io.read) {
1449  int blen = (int) (s->img_buffer_end - s->img_buffer);
1450  if (blen < n) {
1451  int res, count;
1452 
1453  memcpy(buffer, s->img_buffer, blen);
1454 
1455  count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1456  res = (count == (n-blen));
1457  s->img_buffer = s->img_buffer_end;
1458  return res;
1459  }
1460  }
1461 
1462  if (s->img_buffer+n <= s->img_buffer_end) {
1463  memcpy(buffer, s->img_buffer, n);
1464  s->img_buffer += n;
1465  return 1;
1466  } else
1467  return 0;
1468 }
1469 
1470 static int stbi__get16be(stbi__context *s)
1471 {
1472  int z = stbi__get8(s);
1473  return (z << 8) + stbi__get8(s);
1474 }
1475 
1476 static stbi__uint32 stbi__get32be(stbi__context *s)
1477 {
1478  stbi__uint32 z = stbi__get16be(s);
1479  return (z << 16) + stbi__get16be(s);
1480 }
1481 
1482 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1483 // nothing
1484 #else
1485 static int stbi__get16le(stbi__context *s)
1486 {
1487  int z = stbi__get8(s);
1488  return z + (stbi__get8(s) << 8);
1489 }
1490 #endif
1491 
1492 #ifndef STBI_NO_BMP
1493 static stbi__uint32 stbi__get32le(stbi__context *s)
1494 {
1495  stbi__uint32 z = stbi__get16le(s);
1496  return z + (stbi__get16le(s) << 16);
1497 }
1498 #endif
1499 
1500 #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1501 
1502 
1504 //
1505 // generic converter from built-in img_n to req_comp
1506 // individual types do this automatically as much as possible (e.g. jpeg
1507 // does all cases internally since it needs to colorspace convert anyway,
1508 // and it never has alpha, so very few cases ). png can automatically
1509 // interleave an alpha=255 channel, but falls back to this for other cases
1510 //
1511 // assume data buffer is malloced, so malloc a new one and free that one
1512 // only failure mode is malloc failing
1513 
1514 static stbi_uc stbi__compute_y(int r, int g, int b)
1515 {
1516  return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1517 }
1518 
1519 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1520 {
1521  int i,j;
1522  unsigned char *good;
1523 
1524  if (req_comp == img_n) return data;
1525  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1526 
1527  good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1528  if (good == NULL) {
1529  STBI_FREE(data);
1530  return stbi__errpuc("outofmem", "Out of memory");
1531  }
1532 
1533  for (j=0; j < (int) y; ++j) {
1534  unsigned char *src = data + j * x * img_n ;
1535  unsigned char *dest = good + j * x * req_comp;
1536 
1537  #define STBI__COMBO(a,b) ((a)*8+(b))
1538  #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1539  // convert source image with img_n components to one with req_comp components;
1540  // avoid switch per pixel, so use switch per scanline and massive macros
1541  switch (STBI__COMBO(img_n, req_comp)) {
1542  STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break;
1543  STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1544  STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break;
1545  STBI__CASE(2,1) { dest[0]=src[0]; } break;
1546  STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1547  STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break;
1548  STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break;
1549  STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1550  STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break;
1551  STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1552  STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1553  STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break;
1554  default: STBI_ASSERT(0);
1555  }
1556  #undef STBI__CASE
1557  }
1558 
1559  STBI_FREE(data);
1560  return good;
1561 }
1562 
1563 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1564 {
1565  return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1566 }
1567 
1568 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1569 {
1570  int i,j;
1571  stbi__uint16 *good;
1572 
1573  if (req_comp == img_n) return data;
1574  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1575 
1576  good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1577  if (good == NULL) {
1578  STBI_FREE(data);
1579  return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1580  }
1581 
1582  for (j=0; j < (int) y; ++j) {
1583  stbi__uint16 *src = data + j * x * img_n ;
1584  stbi__uint16 *dest = good + j * x * req_comp;
1585 
1586  #define STBI__COMBO(a,b) ((a)*8+(b))
1587  #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1588  // convert source image with img_n components to one with req_comp components;
1589  // avoid switch per pixel, so use switch per scanline and massive macros
1590  switch (STBI__COMBO(img_n, req_comp)) {
1591  STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break;
1592  STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1593  STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break;
1594  STBI__CASE(2,1) { dest[0]=src[0]; } break;
1595  STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1596  STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break;
1597  STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break;
1598  STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1599  STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1600  STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1601  STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1602  STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break;
1603  default: STBI_ASSERT(0);
1604  }
1605  #undef STBI__CASE
1606  }
1607 
1608  STBI_FREE(data);
1609  return good;
1610 }
1611 
1612 #ifndef STBI_NO_LINEAR
1613 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1614 {
1615  int i,k,n;
1616  float *output;
1617  if (!data) return NULL;
1618  output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1619  if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1620  // compute number of non-alpha components
1621  if (comp & 1) n = comp; else n = comp-1;
1622  for (i=0; i < x*y; ++i) {
1623  for (k=0; k < n; ++k) {
1624  output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1625  }
1626  if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1627  }
1628  STBI_FREE(data);
1629  return output;
1630 }
1631 #endif
1632 
1633 #ifndef STBI_NO_HDR
1634 #define stbi__float2int(x) ((int) (x))
1635 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1636 {
1637  int i,k,n;
1638  stbi_uc *output;
1639  if (!data) return NULL;
1640  output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1641  if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1642  // compute number of non-alpha components
1643  if (comp & 1) n = comp; else n = comp-1;
1644  for (i=0; i < x*y; ++i) {
1645  for (k=0; k < n; ++k) {
1646  float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1647  if (z < 0) z = 0;
1648  if (z > 255) z = 255;
1649  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1650  }
1651  if (k < comp) {
1652  float z = data[i*comp+k] * 255 + 0.5f;
1653  if (z < 0) z = 0;
1654  if (z > 255) z = 255;
1655  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1656  }
1657  }
1658  STBI_FREE(data);
1659  return output;
1660 }
1661 #endif
1662 
1664 //
1665 // "baseline" JPEG/JFIF decoder
1666 //
1667 // simple implementation
1668 // - doesn't support delayed output of y-dimension
1669 // - simple interface (only one output format: 8-bit interleaved RGB)
1670 // - doesn't try to recover corrupt jpegs
1671 // - doesn't allow partial loading, loading multiple at once
1672 // - still fast on x86 (copying globals into locals doesn't help x86)
1673 // - allocates lots of intermediate memory (full size of all components)
1674 // - non-interleaved case requires this anyway
1675 // - allows good upsampling (see next)
1676 // high-quality
1677 // - upsampled channels are bilinearly interpolated, even across blocks
1678 // - quality integer IDCT derived from IJG's 'slow'
1679 // performance
1680 // - fast huffman; reasonable integer IDCT
1681 // - some SIMD kernels for common paths on targets with SSE2/NEON
1682 // - uses a lot of intermediate memory, could cache poorly
1683 
1684 #ifndef STBI_NO_JPEG
1685 
1686 // huffman decoding acceleration
1687 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1688 
1689 typedef struct
1690 {
1691  stbi_uc fast[1 << FAST_BITS];
1692  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1693  stbi__uint16 code[256];
1694  stbi_uc values[256];
1695  stbi_uc size[257];
1696  unsigned int maxcode[18];
1697  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1698 } stbi__huffman;
1699 
1700 typedef struct
1701 {
1702  stbi__context *s;
1703  stbi__huffman huff_dc[4];
1704  stbi__huffman huff_ac[4];
1705  stbi__uint16 dequant[4][64];
1706  stbi__int16 fast_ac[4][1 << FAST_BITS];
1707 
1708 // sizes for components, interleaved MCUs
1709  int img_h_max, img_v_max;
1710  int img_mcu_x, img_mcu_y;
1711  int img_mcu_w, img_mcu_h;
1712 
1713 // definition of jpeg image component
1714  struct
1715  {
1716  int id;
1717  int h,v;
1718  int tq;
1719  int hd,ha;
1720  int dc_pred;
1721 
1722  int x,y,w2,h2;
1723  stbi_uc *data;
1724  void *raw_data, *raw_coeff;
1725  stbi_uc *linebuf;
1726  short *coeff; // progressive only
1727  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1728  } img_comp[4];
1729 
1730  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1731  int code_bits; // number of valid bits
1732  unsigned char marker; // marker seen while filling entropy buffer
1733  int nomore; // flag if we saw a marker so must stop
1734 
1735  int progressive;
1736  int spec_start;
1737  int spec_end;
1738  int succ_high;
1739  int succ_low;
1740  int eob_run;
1741  int jfif;
1742  int app14_color_transform; // Adobe APP14 tag
1743  int rgb;
1744 
1745  int scan_n, order[4];
1746  int restart_interval, todo;
1747 
1748 // kernels
1749  void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1750  void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1751  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1752 } stbi__jpeg;
1753 
1754 static int stbi__build_huffman(stbi__huffman *h, int *count)
1755 {
1756  int i,j,k=0;
1757  unsigned int code;
1758  // build size list for each symbol (from JPEG spec)
1759  for (i=0; i < 16; ++i)
1760  for (j=0; j < count[i]; ++j)
1761  h->size[k++] = (stbi_uc) (i+1);
1762  h->size[k] = 0;
1763 
1764  // compute actual symbols (from jpeg spec)
1765  code = 0;
1766  k = 0;
1767  for(j=1; j <= 16; ++j) {
1768  // compute delta to add to code to compute symbol id
1769  h->delta[j] = k - code;
1770  if (h->size[k] == j) {
1771  while (h->size[k] == j)
1772  h->code[k++] = (stbi__uint16) (code++);
1773  if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1774  }
1775  // compute largest code + 1 for this size, preshifted as needed later
1776  h->maxcode[j] = code << (16-j);
1777  code <<= 1;
1778  }
1779  h->maxcode[j] = 0xffffffff;
1780 
1781  // build non-spec acceleration table; 255 is flag for not-accelerated
1782  memset(h->fast, 255, 1 << FAST_BITS);
1783  for (i=0; i < k; ++i) {
1784  int s = h->size[i];
1785  if (s <= FAST_BITS) {
1786  int c = h->code[i] << (FAST_BITS-s);
1787  int m = 1 << (FAST_BITS-s);
1788  for (j=0; j < m; ++j) {
1789  h->fast[c+j] = (stbi_uc) i;
1790  }
1791  }
1792  }
1793  return 1;
1794 }
1795 
1796 // build a table that decodes both magnitude and value of small ACs in
1797 // one go.
1798 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1799 {
1800  int i;
1801  for (i=0; i < (1 << FAST_BITS); ++i) {
1802  stbi_uc fast = h->fast[i];
1803  fast_ac[i] = 0;
1804  if (fast < 255) {
1805  int rs = h->values[fast];
1806  int run = (rs >> 4) & 15;
1807  int magbits = rs & 15;
1808  int len = h->size[fast];
1809 
1810  if (magbits && len + magbits <= FAST_BITS) {
1811  // magnitude code followed by receive_extend code
1812  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1813  int m = 1 << (magbits - 1);
1814  if (k < m) k += (~0U << magbits) + 1;
1815  // if the result is small enough, we can fit it in fast_ac table
1816  if (k >= -128 && k <= 127)
1817  fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1818  }
1819  }
1820  }
1821 }
1822 
1823 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1824 {
1825  do {
1826  unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1827  if (b == 0xff) {
1828  int c = stbi__get8(j->s);
1829  while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1830  if (c != 0) {
1831  j->marker = (unsigned char) c;
1832  j->nomore = 1;
1833  return;
1834  }
1835  }
1836  j->code_buffer |= b << (24 - j->code_bits);
1837  j->code_bits += 8;
1838  } while (j->code_bits <= 24);
1839 }
1840 
1841 // (1 << n) - 1
1842 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1843 
1844 // decode a jpeg huffman value from the bitstream
1845 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1846 {
1847  unsigned int temp;
1848  int c,k;
1849 
1850  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1851 
1852  // look at the top FAST_BITS and determine what symbol ID it is,
1853  // if the code is <= FAST_BITS
1854  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1855  k = h->fast[c];
1856  if (k < 255) {
1857  int s = h->size[k];
1858  if (s > j->code_bits)
1859  return -1;
1860  j->code_buffer <<= s;
1861  j->code_bits -= s;
1862  return h->values[k];
1863  }
1864 
1865  // naive test is to shift the code_buffer down so k bits are
1866  // valid, then test against maxcode. To speed this up, we've
1867  // preshifted maxcode left so that it has (16-k) 0s at the
1868  // end; in other words, regardless of the number of bits, it
1869  // wants to be compared against something shifted to have 16;
1870  // that way we don't need to shift inside the loop.
1871  temp = j->code_buffer >> 16;
1872  for (k=FAST_BITS+1 ; ; ++k)
1873  if (temp < h->maxcode[k])
1874  break;
1875  if (k == 17) {
1876  // error! code not found
1877  j->code_bits -= 16;
1878  return -1;
1879  }
1880 
1881  if (k > j->code_bits)
1882  return -1;
1883 
1884  // convert the huffman code to the symbol id
1885  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1886  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1887 
1888  // convert the id to a symbol
1889  j->code_bits -= k;
1890  j->code_buffer <<= k;
1891  return h->values[c];
1892 }
1893 
1894 // bias[n] = (-1<<n) + 1
1895 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1896 
1897 // combined JPEG 'receive' and JPEG 'extend', since baseline
1898 // always extends everything it receives.
1899 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1900 {
1901  unsigned int k;
1902  int sgn;
1903  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1904 
1905  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1906  k = stbi_lrot(j->code_buffer, n);
1907  STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1908  j->code_buffer = k & ~stbi__bmask[n];
1909  k &= stbi__bmask[n];
1910  j->code_bits -= n;
1911  return k + (stbi__jbias[n] & ~sgn);
1912 }
1913 
1914 // get some unsigned bits
1915 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1916 {
1917  unsigned int k;
1918  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1919  k = stbi_lrot(j->code_buffer, n);
1920  j->code_buffer = k & ~stbi__bmask[n];
1921  k &= stbi__bmask[n];
1922  j->code_bits -= n;
1923  return k;
1924 }
1925 
1926 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1927 {
1928  unsigned int k;
1929  if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1930  k = j->code_buffer;
1931  j->code_buffer <<= 1;
1932  --j->code_bits;
1933  return k & 0x80000000;
1934 }
1935 
1936 // given a value that's at position X in the zigzag stream,
1937 // where does it appear in the 8x8 matrix coded as row-major?
1938 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
1939 {
1940  0, 1, 8, 16, 9, 2, 3, 10,
1941  17, 24, 32, 25, 18, 11, 4, 5,
1942  12, 19, 26, 33, 40, 48, 41, 34,
1943  27, 20, 13, 6, 7, 14, 21, 28,
1944  35, 42, 49, 56, 57, 50, 43, 36,
1945  29, 22, 15, 23, 30, 37, 44, 51,
1946  58, 59, 52, 45, 38, 31, 39, 46,
1947  53, 60, 61, 54, 47, 55, 62, 63,
1948  // let corrupt input sample past end
1949  63, 63, 63, 63, 63, 63, 63, 63,
1950  63, 63, 63, 63, 63, 63, 63
1951 };
1952 
1953 // decode one 64-entry block--
1954 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1955 {
1956  int diff,dc,k;
1957  int t;
1958 
1959  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1960  t = stbi__jpeg_huff_decode(j, hdc);
1961  if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1962 
1963  // 0 all the ac values now so we can do it 32-bits at a time
1964  memset(data,0,64*sizeof(data[0]));
1965 
1966  diff = t ? stbi__extend_receive(j, t) : 0;
1967  dc = j->img_comp[b].dc_pred + diff;
1968  j->img_comp[b].dc_pred = dc;
1969  data[0] = (short) (dc * dequant[0]);
1970 
1971  // decode AC components, see JPEG spec
1972  k = 1;
1973  do {
1974  unsigned int zig;
1975  int c,r,s;
1976  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1977  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1978  r = fac[c];
1979  if (r) { // fast-AC path
1980  k += (r >> 4) & 15; // run
1981  s = r & 15; // combined length
1982  j->code_buffer <<= s;
1983  j->code_bits -= s;
1984  // decode into unzigzag'd location
1985  zig = stbi__jpeg_dezigzag[k++];
1986  data[zig] = (short) ((r >> 8) * dequant[zig]);
1987  } else {
1988  int rs = stbi__jpeg_huff_decode(j, hac);
1989  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1990  s = rs & 15;
1991  r = rs >> 4;
1992  if (s == 0) {
1993  if (rs != 0xf0) break; // end block
1994  k += 16;
1995  } else {
1996  k += r;
1997  // decode into unzigzag'd location
1998  zig = stbi__jpeg_dezigzag[k++];
1999  data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2000  }
2001  }
2002  } while (k < 64);
2003  return 1;
2004 }
2005 
2006 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2007 {
2008  int diff,dc;
2009  int t;
2010  if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2011 
2012  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2013 
2014  if (j->succ_high == 0) {
2015  // first scan for DC coefficient, must be first
2016  memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2017  t = stbi__jpeg_huff_decode(j, hdc);
2018  diff = t ? stbi__extend_receive(j, t) : 0;
2019 
2020  dc = j->img_comp[b].dc_pred + diff;
2021  j->img_comp[b].dc_pred = dc;
2022  data[0] = (short) (dc << j->succ_low);
2023  } else {
2024  // refinement scan for DC coefficient
2025  if (stbi__jpeg_get_bit(j))
2026  data[0] += (short) (1 << j->succ_low);
2027  }
2028  return 1;
2029 }
2030 
2031 // @OPTIMIZE: store non-zigzagged during the decode passes,
2032 // and only de-zigzag when dequantizing
2033 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2034 {
2035  int k;
2036  if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2037 
2038  if (j->succ_high == 0) {
2039  int shift = j->succ_low;
2040 
2041  if (j->eob_run) {
2042  --j->eob_run;
2043  return 1;
2044  }
2045 
2046  k = j->spec_start;
2047  do {
2048  unsigned int zig;
2049  int c,r,s;
2050  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2051  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2052  r = fac[c];
2053  if (r) { // fast-AC path
2054  k += (r >> 4) & 15; // run
2055  s = r & 15; // combined length
2056  j->code_buffer <<= s;
2057  j->code_bits -= s;
2058  zig = stbi__jpeg_dezigzag[k++];
2059  data[zig] = (short) ((r >> 8) << shift);
2060  } else {
2061  int rs = stbi__jpeg_huff_decode(j, hac);
2062  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2063  s = rs & 15;
2064  r = rs >> 4;
2065  if (s == 0) {
2066  if (r < 15) {
2067  j->eob_run = (1 << r);
2068  if (r)
2069  j->eob_run += stbi__jpeg_get_bits(j, r);
2070  --j->eob_run;
2071  break;
2072  }
2073  k += 16;
2074  } else {
2075  k += r;
2076  zig = stbi__jpeg_dezigzag[k++];
2077  data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2078  }
2079  }
2080  } while (k <= j->spec_end);
2081  } else {
2082  // refinement scan for these AC coefficients
2083 
2084  short bit = (short) (1 << j->succ_low);
2085 
2086  if (j->eob_run) {
2087  --j->eob_run;
2088  for (k = j->spec_start; k <= j->spec_end; ++k) {
2089  short *p = &data[stbi__jpeg_dezigzag[k]];
2090  if (*p != 0)
2091  if (stbi__jpeg_get_bit(j))
2092  if ((*p & bit)==0) {
2093  if (*p > 0)
2094  *p += bit;
2095  else
2096  *p -= bit;
2097  }
2098  }
2099  } else {
2100  k = j->spec_start;
2101  do {
2102  int r,s;
2103  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2104  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2105  s = rs & 15;
2106  r = rs >> 4;
2107  if (s == 0) {
2108  if (r < 15) {
2109  j->eob_run = (1 << r) - 1;
2110  if (r)
2111  j->eob_run += stbi__jpeg_get_bits(j, r);
2112  r = 64; // force end of block
2113  } else {
2114  // r=15 s=0 should write 16 0s, so we just do
2115  // a run of 15 0s and then write s (which is 0),
2116  // so we don't have to do anything special here
2117  }
2118  } else {
2119  if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2120  // sign bit
2121  if (stbi__jpeg_get_bit(j))
2122  s = bit;
2123  else
2124  s = -bit;
2125  }
2126 
2127  // advance by r
2128  while (k <= j->spec_end) {
2129  short *p = &data[stbi__jpeg_dezigzag[k++]];
2130  if (*p != 0) {
2131  if (stbi__jpeg_get_bit(j))
2132  if ((*p & bit)==0) {
2133  if (*p > 0)
2134  *p += bit;
2135  else
2136  *p -= bit;
2137  }
2138  } else {
2139  if (r == 0) {
2140  *p = (short) s;
2141  break;
2142  }
2143  --r;
2144  }
2145  }
2146  } while (k <= j->spec_end);
2147  }
2148  }
2149  return 1;
2150 }
2151 
2152 // take a -128..127 value and stbi__clamp it and convert to 0..255
2153 stbi_inline static stbi_uc stbi__clamp(int x)
2154 {
2155  // trick to use a single test to catch both cases
2156  if ((unsigned int) x > 255) {
2157  if (x < 0) return 0;
2158  if (x > 255) return 255;
2159  }
2160  return (stbi_uc) x;
2161 }
2162 
2163 #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
2164 #define stbi__fsh(x) ((x) * 4096)
2165 
2166 // derived from jidctint -- DCT_ISLOW
2167 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2168  int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2169  p2 = s2; \
2170  p3 = s6; \
2171  p1 = (p2+p3) * stbi__f2f(0.5411961f); \
2172  t2 = p1 + p3*stbi__f2f(-1.847759065f); \
2173  t3 = p1 + p2*stbi__f2f( 0.765366865f); \
2174  p2 = s0; \
2175  p3 = s4; \
2176  t0 = stbi__fsh(p2+p3); \
2177  t1 = stbi__fsh(p2-p3); \
2178  x0 = t0+t3; \
2179  x3 = t0-t3; \
2180  x1 = t1+t2; \
2181  x2 = t1-t2; \
2182  t0 = s7; \
2183  t1 = s5; \
2184  t2 = s3; \
2185  t3 = s1; \
2186  p3 = t0+t2; \
2187  p4 = t1+t3; \
2188  p1 = t0+t3; \
2189  p2 = t1+t2; \
2190  p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
2191  t0 = t0*stbi__f2f( 0.298631336f); \
2192  t1 = t1*stbi__f2f( 2.053119869f); \
2193  t2 = t2*stbi__f2f( 3.072711026f); \
2194  t3 = t3*stbi__f2f( 1.501321110f); \
2195  p1 = p5 + p1*stbi__f2f(-0.899976223f); \
2196  p2 = p5 + p2*stbi__f2f(-2.562915447f); \
2197  p3 = p3*stbi__f2f(-1.961570560f); \
2198  p4 = p4*stbi__f2f(-0.390180644f); \
2199  t3 += p1+p4; \
2200  t2 += p2+p3; \
2201  t1 += p2+p4; \
2202  t0 += p1+p3;
2203 
2204 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2205 {
2206  int i,val[64],*v=val;
2207  stbi_uc *o;
2208  short *d = data;
2209 
2210  // columns
2211  for (i=0; i < 8; ++i,++d, ++v) {
2212  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2213  if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2214  && d[40]==0 && d[48]==0 && d[56]==0) {
2215  // no shortcut 0 seconds
2216  // (1|2|3|4|5|6|7)==0 0 seconds
2217  // all separate -0.047 seconds
2218  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2219  int dcterm = d[0]*4;
2220  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2221  } else {
2222  STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2223  // constants scaled things up by 1<<12; let's bring them back
2224  // down, but keep 2 extra bits of precision
2225  x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2226  v[ 0] = (x0+t3) >> 10;
2227  v[56] = (x0-t3) >> 10;
2228  v[ 8] = (x1+t2) >> 10;
2229  v[48] = (x1-t2) >> 10;
2230  v[16] = (x2+t1) >> 10;
2231  v[40] = (x2-t1) >> 10;
2232  v[24] = (x3+t0) >> 10;
2233  v[32] = (x3-t0) >> 10;
2234  }
2235  }
2236 
2237  for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2238  // no fast case since the first 1D IDCT spread components out
2239  STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2240  // constants scaled things up by 1<<12, plus we had 1<<2 from first
2241  // loop, plus horizontal and vertical each scale by sqrt(8) so together
2242  // we've got an extra 1<<3, so 1<<17 total we need to remove.
2243  // so we want to round that, which means adding 0.5 * 1<<17,
2244  // aka 65536. Also, we'll end up with -128 to 127 that we want
2245  // to encode as 0..255 by adding 128, so we'll add that before the shift
2246  x0 += 65536 + (128<<17);
2247  x1 += 65536 + (128<<17);
2248  x2 += 65536 + (128<<17);
2249  x3 += 65536 + (128<<17);
2250  // tried computing the shifts into temps, or'ing the temps to see
2251  // if any were out of range, but that was slower
2252  o[0] = stbi__clamp((x0+t3) >> 17);
2253  o[7] = stbi__clamp((x0-t3) >> 17);
2254  o[1] = stbi__clamp((x1+t2) >> 17);
2255  o[6] = stbi__clamp((x1-t2) >> 17);
2256  o[2] = stbi__clamp((x2+t1) >> 17);
2257  o[5] = stbi__clamp((x2-t1) >> 17);
2258  o[3] = stbi__clamp((x3+t0) >> 17);
2259  o[4] = stbi__clamp((x3-t0) >> 17);
2260  }
2261 }
2262 
2263 #ifdef STBI_SSE2
2264 // sse2 integer IDCT. not the fastest possible implementation but it
2265 // produces bit-identical results to the generic C version so it's
2266 // fully "transparent".
2267 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2268 {
2269  // This is constructed to match our regular (generic) integer IDCT exactly.
2270  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2271  __m128i tmp;
2272 
2273  // dot product constant: even elems=x, odd elems=y
2274  #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2275 
2276  // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2277  // out(1) = c1[even]*x + c1[odd]*y
2278  #define dct_rot(out0,out1, x,y,c0,c1) \
2279  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2280  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2281  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2282  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2283  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2284  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2285 
2286  // out = in << 12 (in 16-bit, out 32-bit)
2287  #define dct_widen(out, in) \
2288  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2289  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2290 
2291  // wide add
2292  #define dct_wadd(out, a, b) \
2293  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2294  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2295 
2296  // wide sub
2297  #define dct_wsub(out, a, b) \
2298  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2299  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2300 
2301  // butterfly a/b, add bias, then shift by "s" and pack
2302  #define dct_bfly32o(out0, out1, a,b,bias,s) \
2303  { \
2304  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2305  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2306  dct_wadd(sum, abiased, b); \
2307  dct_wsub(dif, abiased, b); \
2308  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2309  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2310  }
2311 
2312  // 8-bit interleave step (for transposes)
2313  #define dct_interleave8(a, b) \
2314  tmp = a; \
2315  a = _mm_unpacklo_epi8(a, b); \
2316  b = _mm_unpackhi_epi8(tmp, b)
2317 
2318  // 16-bit interleave step (for transposes)
2319  #define dct_interleave16(a, b) \
2320  tmp = a; \
2321  a = _mm_unpacklo_epi16(a, b); \
2322  b = _mm_unpackhi_epi16(tmp, b)
2323 
2324  #define dct_pass(bias,shift) \
2325  { \
2326  /* even part */ \
2327  dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2328  __m128i sum04 = _mm_add_epi16(row0, row4); \
2329  __m128i dif04 = _mm_sub_epi16(row0, row4); \
2330  dct_widen(t0e, sum04); \
2331  dct_widen(t1e, dif04); \
2332  dct_wadd(x0, t0e, t3e); \
2333  dct_wsub(x3, t0e, t3e); \
2334  dct_wadd(x1, t1e, t2e); \
2335  dct_wsub(x2, t1e, t2e); \
2336  /* odd part */ \
2337  dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2338  dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2339  __m128i sum17 = _mm_add_epi16(row1, row7); \
2340  __m128i sum35 = _mm_add_epi16(row3, row5); \
2341  dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2342  dct_wadd(x4, y0o, y4o); \
2343  dct_wadd(x5, y1o, y5o); \
2344  dct_wadd(x6, y2o, y5o); \
2345  dct_wadd(x7, y3o, y4o); \
2346  dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2347  dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2348  dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2349  dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2350  }
2351 
2352  __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2353  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2354  __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2355  __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2356  __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2357  __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2358  __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2359  __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2360 
2361  // rounding biases in column/row passes, see stbi__idct_block for explanation.
2362  __m128i bias_0 = _mm_set1_epi32(512);
2363  __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2364 
2365  // load
2366  row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2367  row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2368  row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2369  row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2370  row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2371  row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2372  row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2373  row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2374 
2375  // column pass
2376  dct_pass(bias_0, 10);
2377 
2378  {
2379  // 16bit 8x8 transpose pass 1
2380  dct_interleave16(row0, row4);
2381  dct_interleave16(row1, row5);
2382  dct_interleave16(row2, row6);
2383  dct_interleave16(row3, row7);
2384 
2385  // transpose pass 2
2386  dct_interleave16(row0, row2);
2387  dct_interleave16(row1, row3);
2388  dct_interleave16(row4, row6);
2389  dct_interleave16(row5, row7);
2390 
2391  // transpose pass 3
2392  dct_interleave16(row0, row1);
2393  dct_interleave16(row2, row3);
2394  dct_interleave16(row4, row5);
2395  dct_interleave16(row6, row7);
2396  }
2397 
2398  // row pass
2399  dct_pass(bias_1, 17);
2400 
2401  {
2402  // pack
2403  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2404  __m128i p1 = _mm_packus_epi16(row2, row3);
2405  __m128i p2 = _mm_packus_epi16(row4, row5);
2406  __m128i p3 = _mm_packus_epi16(row6, row7);
2407 
2408  // 8bit 8x8 transpose pass 1
2409  dct_interleave8(p0, p2); // a0e0a1e1...
2410  dct_interleave8(p1, p3); // c0g0c1g1...
2411 
2412  // transpose pass 2
2413  dct_interleave8(p0, p1); // a0c0e0g0...
2414  dct_interleave8(p2, p3); // b0d0f0h0...
2415 
2416  // transpose pass 3
2417  dct_interleave8(p0, p2); // a0b0c0d0...
2418  dct_interleave8(p1, p3); // a4b4c4d4...
2419 
2420  // store
2421  _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2422  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2423  _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2424  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2425  _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2426  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2427  _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2428  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2429  }
2430 
2431 #undef dct_const
2432 #undef dct_rot
2433 #undef dct_widen
2434 #undef dct_wadd
2435 #undef dct_wsub
2436 #undef dct_bfly32o
2437 #undef dct_interleave8
2438 #undef dct_interleave16
2439 #undef dct_pass
2440 }
2441 
2442 #endif // STBI_SSE2
2443 
2444 #ifdef STBI_NEON
2445 
2446 // NEON integer IDCT. should produce bit-identical
2447 // results to the generic C version.
2448 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2449 {
2450  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2451 
2452  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2453  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2454  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2455  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2456  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2457  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2458  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2459  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2460  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2461  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2462  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2463  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2464 
2465 #define dct_long_mul(out, inq, coeff) \
2466  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2467  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2468 
2469 #define dct_long_mac(out, acc, inq, coeff) \
2470  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2471  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2472 
2473 #define dct_widen(out, inq) \
2474  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2475  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2476 
2477 // wide add
2478 #define dct_wadd(out, a, b) \
2479  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2480  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2481 
2482 // wide sub
2483 #define dct_wsub(out, a, b) \
2484  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2485  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2486 
2487 // butterfly a/b, then shift using "shiftop" by "s" and pack
2488 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2489  { \
2490  dct_wadd(sum, a, b); \
2491  dct_wsub(dif, a, b); \
2492  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2493  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2494  }
2495 
2496 #define dct_pass(shiftop, shift) \
2497  { \
2498  /* even part */ \
2499  int16x8_t sum26 = vaddq_s16(row2, row6); \
2500  dct_long_mul(p1e, sum26, rot0_0); \
2501  dct_long_mac(t2e, p1e, row6, rot0_1); \
2502  dct_long_mac(t3e, p1e, row2, rot0_2); \
2503  int16x8_t sum04 = vaddq_s16(row0, row4); \
2504  int16x8_t dif04 = vsubq_s16(row0, row4); \
2505  dct_widen(t0e, sum04); \
2506  dct_widen(t1e, dif04); \
2507  dct_wadd(x0, t0e, t3e); \
2508  dct_wsub(x3, t0e, t3e); \
2509  dct_wadd(x1, t1e, t2e); \
2510  dct_wsub(x2, t1e, t2e); \
2511  /* odd part */ \
2512  int16x8_t sum15 = vaddq_s16(row1, row5); \
2513  int16x8_t sum17 = vaddq_s16(row1, row7); \
2514  int16x8_t sum35 = vaddq_s16(row3, row5); \
2515  int16x8_t sum37 = vaddq_s16(row3, row7); \
2516  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2517  dct_long_mul(p5o, sumodd, rot1_0); \
2518  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2519  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2520  dct_long_mul(p3o, sum37, rot2_0); \
2521  dct_long_mul(p4o, sum15, rot2_1); \
2522  dct_wadd(sump13o, p1o, p3o); \
2523  dct_wadd(sump24o, p2o, p4o); \
2524  dct_wadd(sump23o, p2o, p3o); \
2525  dct_wadd(sump14o, p1o, p4o); \
2526  dct_long_mac(x4, sump13o, row7, rot3_0); \
2527  dct_long_mac(x5, sump24o, row5, rot3_1); \
2528  dct_long_mac(x6, sump23o, row3, rot3_2); \
2529  dct_long_mac(x7, sump14o, row1, rot3_3); \
2530  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2531  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2532  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2533  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2534  }
2535 
2536  // load
2537  row0 = vld1q_s16(data + 0*8);
2538  row1 = vld1q_s16(data + 1*8);
2539  row2 = vld1q_s16(data + 2*8);
2540  row3 = vld1q_s16(data + 3*8);
2541  row4 = vld1q_s16(data + 4*8);
2542  row5 = vld1q_s16(data + 5*8);
2543  row6 = vld1q_s16(data + 6*8);
2544  row7 = vld1q_s16(data + 7*8);
2545 
2546  // add DC bias
2547  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2548 
2549  // column pass
2550  dct_pass(vrshrn_n_s32, 10);
2551 
2552  // 16bit 8x8 transpose
2553  {
2554 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2555 // whether compilers actually get this is another story, sadly.
2556 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2557 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2558 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2559 
2560  // pass 1
2561  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2562  dct_trn16(row2, row3);
2563  dct_trn16(row4, row5);
2564  dct_trn16(row6, row7);
2565 
2566  // pass 2
2567  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2568  dct_trn32(row1, row3);
2569  dct_trn32(row4, row6);
2570  dct_trn32(row5, row7);
2571 
2572  // pass 3
2573  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2574  dct_trn64(row1, row5);
2575  dct_trn64(row2, row6);
2576  dct_trn64(row3, row7);
2577 
2578 #undef dct_trn16
2579 #undef dct_trn32
2580 #undef dct_trn64
2581  }
2582 
2583  // row pass
2584  // vrshrn_n_s32 only supports shifts up to 16, we need
2585  // 17. so do a non-rounding shift of 16 first then follow
2586  // up with a rounding shift by 1.
2587  dct_pass(vshrn_n_s32, 16);
2588 
2589  {
2590  // pack and round
2591  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2592  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2593  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2594  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2595  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2596  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2597  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2598  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2599 
2600  // again, these can translate into one instruction, but often don't.
2601 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2602 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2603 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2604 
2605  // sadly can't use interleaved stores here since we only write
2606  // 8 bytes to each scan line!
2607 
2608  // 8x8 8-bit transpose pass 1
2609  dct_trn8_8(p0, p1);
2610  dct_trn8_8(p2, p3);
2611  dct_trn8_8(p4, p5);
2612  dct_trn8_8(p6, p7);
2613 
2614  // pass 2
2615  dct_trn8_16(p0, p2);
2616  dct_trn8_16(p1, p3);
2617  dct_trn8_16(p4, p6);
2618  dct_trn8_16(p5, p7);
2619 
2620  // pass 3
2621  dct_trn8_32(p0, p4);
2622  dct_trn8_32(p1, p5);
2623  dct_trn8_32(p2, p6);
2624  dct_trn8_32(p3, p7);
2625 
2626  // store
2627  vst1_u8(out, p0); out += out_stride;
2628  vst1_u8(out, p1); out += out_stride;
2629  vst1_u8(out, p2); out += out_stride;
2630  vst1_u8(out, p3); out += out_stride;
2631  vst1_u8(out, p4); out += out_stride;
2632  vst1_u8(out, p5); out += out_stride;
2633  vst1_u8(out, p6); out += out_stride;
2634  vst1_u8(out, p7);
2635 
2636 #undef dct_trn8_8
2637 #undef dct_trn8_16
2638 #undef dct_trn8_32
2639  }
2640 
2641 #undef dct_long_mul
2642 #undef dct_long_mac
2643 #undef dct_widen
2644 #undef dct_wadd
2645 #undef dct_wsub
2646 #undef dct_bfly32o
2647 #undef dct_pass
2648 }
2649 
2650 #endif // STBI_NEON
2651 
2652 #define STBI__MARKER_none 0xff
2653 // if there's a pending marker from the entropy stream, return that
2654 // otherwise, fetch from the stream and get a marker. if there's no
2655 // marker, return 0xff, which is never a valid marker value
2656 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2657 {
2658  stbi_uc x;
2659  if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2660  x = stbi__get8(j->s);
2661  if (x != 0xff) return STBI__MARKER_none;
2662  while (x == 0xff)
2663  x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2664  return x;
2665 }
2666 
2667 // in each scan, we'll have scan_n components, and the order
2668 // of the components is specified by order[]
2669 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2670 
2671 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2672 // the dc prediction
2673 static void stbi__jpeg_reset(stbi__jpeg *j)
2674 {
2675  j->code_bits = 0;
2676  j->code_buffer = 0;
2677  j->nomore = 0;
2678  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2679  j->marker = STBI__MARKER_none;
2680  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2681  j->eob_run = 0;
2682  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2683  // since we don't even allow 1<<30 pixels
2684 }
2685 
2686 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2687 {
2688  stbi__jpeg_reset(z);
2689  if (!z->progressive) {
2690  if (z->scan_n == 1) {
2691  int i,j;
2692  STBI_SIMD_ALIGN(short, data[64]);
2693  int n = z->order[0];
2694  // non-interleaved data, we just need to process one block at a time,
2695  // in trivial scanline order
2696  // number of blocks to do just depends on how many actual "pixels" this
2697  // component has, independent of interleaved MCU blocking and such
2698  int w = (z->img_comp[n].x+7) >> 3;
2699  int h = (z->img_comp[n].y+7) >> 3;
2700  for (j=0; j < h; ++j) {
2701  for (i=0; i < w; ++i) {
2702  int ha = z->img_comp[n].ha;
2703  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2704  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2705  // every data block is an MCU, so countdown the restart interval
2706  if (--z->todo <= 0) {
2707  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2708  // if it's NOT a restart, then just bail, so we get corrupt data
2709  // rather than no data
2710  if (!STBI__RESTART(z->marker)) return 1;
2711  stbi__jpeg_reset(z);
2712  }
2713  }
2714  }
2715  return 1;
2716  } else { // interleaved
2717  int i,j,k,x,y;
2718  STBI_SIMD_ALIGN(short, data[64]);
2719  for (j=0; j < z->img_mcu_y; ++j) {
2720  for (i=0; i < z->img_mcu_x; ++i) {
2721  // scan an interleaved mcu... process scan_n components in order
2722  for (k=0; k < z->scan_n; ++k) {
2723  int n = z->order[k];
2724  // scan out an mcu's worth of this component; that's just determined
2725  // by the basic H and V specified for the component
2726  for (y=0; y < z->img_comp[n].v; ++y) {
2727  for (x=0; x < z->img_comp[n].h; ++x) {
2728  int x2 = (i*z->img_comp[n].h + x)*8;
2729  int y2 = (j*z->img_comp[n].v + y)*8;
2730  int ha = z->img_comp[n].ha;
2731  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2732  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2733  }
2734  }
2735  }
2736  // after all interleaved components, that's an interleaved MCU,
2737  // so now count down the restart interval
2738  if (--z->todo <= 0) {
2739  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2740  if (!STBI__RESTART(z->marker)) return 1;
2741  stbi__jpeg_reset(z);
2742  }
2743  }
2744  }
2745  return 1;
2746  }
2747  } else {
2748  if (z->scan_n == 1) {
2749  int i,j;
2750  int n = z->order[0];
2751  // non-interleaved data, we just need to process one block at a time,
2752  // in trivial scanline order
2753  // number of blocks to do just depends on how many actual "pixels" this
2754  // component has, independent of interleaved MCU blocking and such
2755  int w = (z->img_comp[n].x+7) >> 3;
2756  int h = (z->img_comp[n].y+7) >> 3;
2757  for (j=0; j < h; ++j) {
2758  for (i=0; i < w; ++i) {
2759  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2760  if (z->spec_start == 0) {
2761  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2762  return 0;
2763  } else {
2764  int ha = z->img_comp[n].ha;
2765  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2766  return 0;
2767  }
2768  // every data block is an MCU, so countdown the restart interval
2769  if (--z->todo <= 0) {
2770  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2771  if (!STBI__RESTART(z->marker)) return 1;
2772  stbi__jpeg_reset(z);
2773  }
2774  }
2775  }
2776  return 1;
2777  } else { // interleaved
2778  int i,j,k,x,y;
2779  for (j=0; j < z->img_mcu_y; ++j) {
2780  for (i=0; i < z->img_mcu_x; ++i) {
2781  // scan an interleaved mcu... process scan_n components in order
2782  for (k=0; k < z->scan_n; ++k) {
2783  int n = z->order[k];
2784  // scan out an mcu's worth of this component; that's just determined
2785  // by the basic H and V specified for the component
2786  for (y=0; y < z->img_comp[n].v; ++y) {
2787  for (x=0; x < z->img_comp[n].h; ++x) {
2788  int x2 = (i*z->img_comp[n].h + x);
2789  int y2 = (j*z->img_comp[n].v + y);
2790  short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2791  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2792  return 0;
2793  }
2794  }
2795  }
2796  // after all interleaved components, that's an interleaved MCU,
2797  // so now count down the restart interval
2798  if (--z->todo <= 0) {
2799  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2800  if (!STBI__RESTART(z->marker)) return 1;
2801  stbi__jpeg_reset(z);
2802  }
2803  }
2804  }
2805  return 1;
2806  }
2807  }
2808 }
2809 
2810 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2811 {
2812  int i;
2813  for (i=0; i < 64; ++i)
2814  data[i] *= dequant[i];
2815 }
2816 
2817 static void stbi__jpeg_finish(stbi__jpeg *z)
2818 {
2819  if (z->progressive) {
2820  // dequantize and idct the data
2821  int i,j,n;
2822  for (n=0; n < z->s->img_n; ++n) {
2823  int w = (z->img_comp[n].x+7) >> 3;
2824  int h = (z->img_comp[n].y+7) >> 3;
2825  for (j=0; j < h; ++j) {
2826  for (i=0; i < w; ++i) {
2827  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2828  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2829  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2830  }
2831  }
2832  }
2833  }
2834 }
2835 
2836 static int stbi__process_marker(stbi__jpeg *z, int m)
2837 {
2838  int L;
2839  switch (m) {
2840  case STBI__MARKER_none: // no marker found
2841  return stbi__err("expected marker","Corrupt JPEG");
2842 
2843  case 0xDD: // DRI - specify restart interval
2844  if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2845  z->restart_interval = stbi__get16be(z->s);
2846  return 1;
2847 
2848  case 0xDB: // DQT - define quantization table
2849  L = stbi__get16be(z->s)-2;
2850  while (L > 0) {
2851  int q = stbi__get8(z->s);
2852  int p = q >> 4, sixteen = (p != 0);
2853  int t = q & 15,i;
2854  if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2855  if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2856 
2857  for (i=0; i < 64; ++i)
2858  z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2859  L -= (sixteen ? 129 : 65);
2860  }
2861  return L==0;
2862 
2863  case 0xC4: // DHT - define huffman table
2864  L = stbi__get16be(z->s)-2;
2865  while (L > 0) {
2866  stbi_uc *v;
2867  int sizes[16],i,n=0;
2868  int q = stbi__get8(z->s);
2869  int tc = q >> 4;
2870  int th = q & 15;
2871  if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2872  for (i=0; i < 16; ++i) {
2873  sizes[i] = stbi__get8(z->s);
2874  n += sizes[i];
2875  }
2876  L -= 17;
2877  if (tc == 0) {
2878  if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2879  v = z->huff_dc[th].values;
2880  } else {
2881  if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2882  v = z->huff_ac[th].values;
2883  }
2884  for (i=0; i < n; ++i)
2885  v[i] = stbi__get8(z->s);
2886  if (tc != 0)
2887  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2888  L -= n;
2889  }
2890  return L==0;
2891  }
2892 
2893  // check for comment block or APP blocks
2894  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2895  L = stbi__get16be(z->s);
2896  if (L < 2) {
2897  if (m == 0xFE)
2898  return stbi__err("bad COM len","Corrupt JPEG");
2899  else
2900  return stbi__err("bad APP len","Corrupt JPEG");
2901  }
2902  L -= 2;
2903 
2904  if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2905  static const unsigned char tag[5] = {'J','F','I','F','\0'};
2906  int ok = 1;
2907  int i;
2908  for (i=0; i < 5; ++i)
2909  if (stbi__get8(z->s) != tag[i])
2910  ok = 0;
2911  L -= 5;
2912  if (ok)
2913  z->jfif = 1;
2914  } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2915  static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2916  int ok = 1;
2917  int i;
2918  for (i=0; i < 6; ++i)
2919  if (stbi__get8(z->s) != tag[i])
2920  ok = 0;
2921  L -= 6;
2922  if (ok) {
2923  stbi__get8(z->s); // version
2924  stbi__get16be(z->s); // flags0
2925  stbi__get16be(z->s); // flags1
2926  z->app14_color_transform = stbi__get8(z->s); // color transform
2927  L -= 6;
2928  }
2929  }
2930 
2931  stbi__skip(z->s, L);
2932  return 1;
2933  }
2934 
2935  return stbi__err("unknown marker","Corrupt JPEG");
2936 }
2937 
2938 // after we see SOS
2939 static int stbi__process_scan_header(stbi__jpeg *z)
2940 {
2941  int i;
2942  int Ls = stbi__get16be(z->s);
2943  z->scan_n = stbi__get8(z->s);
2944  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2945  if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2946  for (i=0; i < z->scan_n; ++i) {
2947  int id = stbi__get8(z->s), which;
2948  int q = stbi__get8(z->s);
2949  for (which = 0; which < z->s->img_n; ++which)
2950  if (z->img_comp[which].id == id)
2951  break;
2952  if (which == z->s->img_n) return 0; // no match
2953  z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2954  z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2955  z->order[i] = which;
2956  }
2957 
2958  {
2959  int aa;
2960  z->spec_start = stbi__get8(z->s);
2961  z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2962  aa = stbi__get8(z->s);
2963  z->succ_high = (aa >> 4);
2964  z->succ_low = (aa & 15);
2965  if (z->progressive) {
2966  if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2967  return stbi__err("bad SOS", "Corrupt JPEG");
2968  } else {
2969  if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2970  if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2971  z->spec_end = 63;
2972  }
2973  }
2974 
2975  return 1;
2976 }
2977 
2978 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2979 {
2980  int i;
2981  for (i=0; i < ncomp; ++i) {
2982  if (z->img_comp[i].raw_data) {
2983  STBI_FREE(z->img_comp[i].raw_data);
2984  z->img_comp[i].raw_data = NULL;
2985  z->img_comp[i].data = NULL;
2986  }
2987  if (z->img_comp[i].raw_coeff) {
2988  STBI_FREE(z->img_comp[i].raw_coeff);
2989  z->img_comp[i].raw_coeff = 0;
2990  z->img_comp[i].coeff = 0;
2991  }
2992  if (z->img_comp[i].linebuf) {
2993  STBI_FREE(z->img_comp[i].linebuf);
2994  z->img_comp[i].linebuf = NULL;
2995  }
2996  }
2997  return why;
2998 }
2999 
3000 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3001 {
3002  stbi__context *s = z->s;
3003  int Lf,p,i,q, h_max=1,v_max=1,c;
3004  Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3005  p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3006  s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3007  s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3008  c = stbi__get8(s);
3009  if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3010  s->img_n = c;
3011  for (i=0; i < c; ++i) {
3012  z->img_comp[i].data = NULL;
3013  z->img_comp[i].linebuf = NULL;
3014  }
3015 
3016  if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3017 
3018  z->rgb = 0;
3019  for (i=0; i < s->img_n; ++i) {
3020  static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3021  z->img_comp[i].id = stbi__get8(s);
3022  if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3023  ++z->rgb;
3024  q = stbi__get8(s);
3025  z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3026  z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3027  z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3028  }
3029 
3030  if (scan != STBI__SCAN_load) return 1;
3031 
3032  if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3033 
3034  for (i=0; i < s->img_n; ++i) {
3035  if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3036  if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3037  }
3038 
3039  // compute interleaved mcu info
3040  z->img_h_max = h_max;
3041  z->img_v_max = v_max;
3042  z->img_mcu_w = h_max * 8;
3043  z->img_mcu_h = v_max * 8;
3044  // these sizes can't be more than 17 bits
3045  z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3046  z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3047 
3048  for (i=0; i < s->img_n; ++i) {
3049  // number of effective pixels (e.g. for non-interleaved MCU)
3050  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3051  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3052  // to simplify generation, we'll allocate enough memory to decode
3053  // the bogus oversized data from using interleaved MCUs and their
3054  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3055  // discard the extra data until colorspace conversion
3056  //
3057  // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3058  // so these muls can't overflow with 32-bit ints (which we require)
3059  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3060  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3061  z->img_comp[i].coeff = 0;
3062  z->img_comp[i].raw_coeff = 0;
3063  z->img_comp[i].linebuf = NULL;
3064  z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3065  if (z->img_comp[i].raw_data == NULL)
3066  return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3067  // align blocks for idct using mmx/sse
3068  z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3069  if (z->progressive) {
3070  // w2, h2 are multiples of 8 (see above)
3071  z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3072  z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3073  z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3074  if (z->img_comp[i].raw_coeff == NULL)
3075  return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3076  z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3077  }
3078  }
3079 
3080  return 1;
3081 }
3082 
3083 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3084 #define stbi__DNL(x) ((x) == 0xdc)
3085 #define stbi__SOI(x) ((x) == 0xd8)
3086 #define stbi__EOI(x) ((x) == 0xd9)
3087 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3088 #define stbi__SOS(x) ((x) == 0xda)
3089 
3090 #define stbi__SOF_progressive(x) ((x) == 0xc2)
3091 
3092 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3093 {
3094  int m;
3095  z->jfif = 0;
3096  z->app14_color_transform = -1; // valid values are 0,1,2
3097  z->marker = STBI__MARKER_none; // initialize cached marker to empty
3098  m = stbi__get_marker(z);
3099  if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3100  if (scan == STBI__SCAN_type) return 1;
3101  m = stbi__get_marker(z);
3102  while (!stbi__SOF(m)) {
3103  if (!stbi__process_marker(z,m)) return 0;
3104  m = stbi__get_marker(z);
3105  while (m == STBI__MARKER_none) {
3106  // some files have extra padding after their blocks, so ok, we'll scan
3107  if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3108  m = stbi__get_marker(z);
3109  }
3110  }
3111  z->progressive = stbi__SOF_progressive(m);
3112  if (!stbi__process_frame_header(z, scan)) return 0;
3113  return 1;
3114 }
3115 
3116 // decode image to YCbCr format
3117 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3118 {
3119  int m;
3120  for (m = 0; m < 4; m++) {
3121  j->img_comp[m].raw_data = NULL;
3122  j->img_comp[m].raw_coeff = NULL;
3123  }
3124  j->restart_interval = 0;
3125  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3126  m = stbi__get_marker(j);
3127  while (!stbi__EOI(m)) {
3128  if (stbi__SOS(m)) {
3129  if (!stbi__process_scan_header(j)) return 0;
3130  if (!stbi__parse_entropy_coded_data(j)) return 0;
3131  if (j->marker == STBI__MARKER_none ) {
3132  // handle 0s at the end of image data from IP Kamera 9060
3133  while (!stbi__at_eof(j->s)) {
3134  int x = stbi__get8(j->s);
3135  if (x == 255) {
3136  j->marker = stbi__get8(j->s);
3137  break;
3138  }
3139  }
3140  // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3141  }
3142  } else if (stbi__DNL(m)) {
3143  int Ld = stbi__get16be(j->s);
3144  stbi__uint32 NL = stbi__get16be(j->s);
3145  if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3146  if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3147  } else {
3148  if (!stbi__process_marker(j, m)) return 0;
3149  }
3150  m = stbi__get_marker(j);
3151  }
3152  if (j->progressive)
3153  stbi__jpeg_finish(j);
3154  return 1;
3155 }
3156 
3157 // static jfif-centered resampling (across block boundaries)
3158 
3159 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3160  int w, int hs);
3161 
3162 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3163 
3164 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3165 {
3166  STBI_NOTUSED(out);
3167  STBI_NOTUSED(in_far);
3168  STBI_NOTUSED(w);
3169  STBI_NOTUSED(hs);
3170  return in_near;
3171 }
3172 
3173 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3174 {
3175  // need to generate two samples vertically for every one in input
3176  int i;
3177  STBI_NOTUSED(hs);
3178  for (i=0; i < w; ++i)
3179  out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3180  return out;
3181 }
3182 
3183 static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3184 {
3185  // need to generate two samples horizontally for every one in input
3186  int i;
3187  stbi_uc *input = in_near;
3188 
3189  if (w == 1) {
3190  // if only one sample, can't do any interpolation
3191  out[0] = out[1] = input[0];
3192  return out;
3193  }
3194 
3195  out[0] = input[0];
3196  out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3197  for (i=1; i < w-1; ++i) {
3198  int n = 3*input[i]+2;
3199  out[i*2+0] = stbi__div4(n+input[i-1]);
3200  out[i*2+1] = stbi__div4(n+input[i+1]);
3201  }
3202  out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3203  out[i*2+1] = input[w-1];
3204 
3205  STBI_NOTUSED(in_far);
3206  STBI_NOTUSED(hs);
3207 
3208  return out;
3209 }
3210 
3211 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3212 
3213 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3214 {
3215  // need to generate 2x2 samples for every one in input
3216  int i,t0,t1;
3217  if (w == 1) {
3218  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3219  return out;
3220  }
3221 
3222  t1 = 3*in_near[0] + in_far[0];
3223  out[0] = stbi__div4(t1+2);
3224  for (i=1; i < w; ++i) {
3225  t0 = t1;
3226  t1 = 3*in_near[i]+in_far[i];
3227  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3228  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3229  }
3230  out[w*2-1] = stbi__div4(t1+2);
3231 
3232  STBI_NOTUSED(hs);
3233 
3234  return out;
3235 }
3236 
3237 #if defined(STBI_SSE2) || defined(STBI_NEON)
3238 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3239 {
3240  // need to generate 2x2 samples for every one in input
3241  int i=0,t0,t1;
3242 
3243  if (w == 1) {
3244  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3245  return out;
3246  }
3247 
3248  t1 = 3*in_near[0] + in_far[0];
3249  // process groups of 8 pixels for as long as we can.
3250  // note we can't handle the last pixel in a row in this loop
3251  // because we need to handle the filter boundary conditions.
3252  for (; i < ((w-1) & ~7); i += 8) {
3253 #if defined(STBI_SSE2)
3254  // load and perform the vertical filtering pass
3255  // this uses 3*x + y = 4*x + (y - x)
3256  __m128i zero = _mm_setzero_si128();
3257  __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
3258  __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3259  __m128i farw = _mm_unpacklo_epi8(farb, zero);
3260  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3261  __m128i diff = _mm_sub_epi16(farw, nearw);
3262  __m128i nears = _mm_slli_epi16(nearw, 2);
3263  __m128i curr = _mm_add_epi16(nears, diff); // current row
3264 
3265  // horizontal filter works the same based on shifted vers of current
3266  // row. "prev" is current row shifted right by 1 pixel; we need to
3267  // insert the previous pixel value (from t1).
3268  // "next" is current row shifted left by 1 pixel, with first pixel
3269  // of next block of 8 pixels added in.
3270  __m128i prv0 = _mm_slli_si128(curr, 2);
3271  __m128i nxt0 = _mm_srli_si128(curr, 2);
3272  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3273  __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3274 
3275  // horizontal filter, polyphase implementation since it's convenient:
3276  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3277  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3278  // note the shared term.
3279  __m128i bias = _mm_set1_epi16(8);
3280  __m128i curs = _mm_slli_epi16(curr, 2);
3281  __m128i prvd = _mm_sub_epi16(prev, curr);
3282  __m128i nxtd = _mm_sub_epi16(next, curr);
3283  __m128i curb = _mm_add_epi16(curs, bias);
3284  __m128i even = _mm_add_epi16(prvd, curb);
3285  __m128i odd = _mm_add_epi16(nxtd, curb);
3286 
3287  // interleave even and odd pixels, then undo scaling.
3288  __m128i int0 = _mm_unpacklo_epi16(even, odd);
3289  __m128i int1 = _mm_unpackhi_epi16(even, odd);
3290  __m128i de0 = _mm_srli_epi16(int0, 4);
3291  __m128i de1 = _mm_srli_epi16(int1, 4);
3292 
3293  // pack and write output
3294  __m128i outv = _mm_packus_epi16(de0, de1);
3295  _mm_storeu_si128((__m128i *) (out + i*2), outv);
3296 #elif defined(STBI_NEON)
3297  // load and perform the vertical filtering pass
3298  // this uses 3*x + y = 4*x + (y - x)
3299  uint8x8_t farb = vld1_u8(in_far + i);
3300  uint8x8_t nearb = vld1_u8(in_near + i);
3301  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3302  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3303  int16x8_t curr = vaddq_s16(nears, diff); // current row
3304 
3305  // horizontal filter works the same based on shifted vers of current
3306  // row. "prev" is current row shifted right by 1 pixel; we need to
3307  // insert the previous pixel value (from t1).
3308  // "next" is current row shifted left by 1 pixel, with first pixel
3309  // of next block of 8 pixels added in.
3310  int16x8_t prv0 = vextq_s16(curr, curr, 7);
3311  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3312  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3313  int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3314 
3315  // horizontal filter, polyphase implementation since it's convenient:
3316  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3317  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3318  // note the shared term.
3319  int16x8_t curs = vshlq_n_s16(curr, 2);
3320  int16x8_t prvd = vsubq_s16(prev, curr);
3321  int16x8_t nxtd = vsubq_s16(next, curr);
3322  int16x8_t even = vaddq_s16(curs, prvd);
3323  int16x8_t odd = vaddq_s16(curs, nxtd);
3324 
3325  // undo scaling and round, then store with even/odd phases interleaved
3326  uint8x8x2_t o;
3327  o.val[0] = vqrshrun_n_s16(even, 4);
3328  o.val[1] = vqrshrun_n_s16(odd, 4);
3329  vst2_u8(out + i*2, o);
3330 #endif
3331 
3332  // "previous" value for next iter
3333  t1 = 3*in_near[i+7] + in_far[i+7];
3334  }
3335 
3336  t0 = t1;
3337  t1 = 3*in_near[i] + in_far[i];
3338  out[i*2] = stbi__div16(3*t1 + t0 + 8);
3339 
3340  for (++i; i < w; ++i) {
3341  t0 = t1;
3342  t1 = 3*in_near[i]+in_far[i];
3343  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3344  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3345  }
3346  out[w*2-1] = stbi__div4(t1+2);
3347 
3348  STBI_NOTUSED(hs);
3349 
3350  return out;
3351 }
3352 #endif
3353 
3354 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3355 {
3356  // resample with nearest-neighbor
3357  int i,j;
3358  STBI_NOTUSED(in_far);
3359  for (i=0; i < w; ++i)
3360  for (j=0; j < hs; ++j)
3361  out[i*hs+j] = in_near[i];
3362  return out;
3363 }
3364 
3365 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3366 // to make sure the code produces the same results in both SIMD and scalar
3367 #define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
3368 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3369 {
3370  int i;
3371  for (i=0; i < count; ++i) {
3372  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3373  int r,g,b;
3374  int cr = pcr[i] - 128;
3375  int cb = pcb[i] - 128;
3376  r = y_fixed + cr* stbi__float2fixed(1.40200f);
3377  g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3378  b = y_fixed + cb* stbi__float2fixed(1.77200f);
3379  r >>= 20;
3380  g >>= 20;
3381  b >>= 20;
3382  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3383  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3384  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3385  out[0] = (stbi_uc)r;
3386  out[1] = (stbi_uc)g;
3387  out[2] = (stbi_uc)b;
3388  out[3] = 255;
3389  out += step;
3390  }
3391 }
3392 
3393 #if defined(STBI_SSE2) || defined(STBI_NEON)
3394 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3395 {
3396  int i = 0;
3397 
3398 #ifdef STBI_SSE2
3399  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3400  // it's useful in practice (you wouldn't use it for textures, for example).
3401  // so just accelerate step == 4 case.
3402  if (step == 4) {
3403  // this is a fairly straightforward implementation and not super-optimized.
3404  __m128i signflip = _mm_set1_epi8(-0x80);
3405  __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
3406  __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3407  __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3408  __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3409  __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3410  __m128i xw = _mm_set1_epi16(255); // alpha channel
3411 
3412  for (; i+7 < count; i += 8) {
3413  // load
3414  __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3415  __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3416  __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3417  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3418  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3419 
3420  // unpack to short (and left-shift cr, cb by 8)
3421  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3422  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3423  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3424 
3425  // color transform
3426  __m128i yws = _mm_srli_epi16(yw, 4);
3427  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3428  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3429  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3430  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3431  __m128i rws = _mm_add_epi16(cr0, yws);
3432  __m128i gwt = _mm_add_epi16(cb0, yws);
3433  __m128i bws = _mm_add_epi16(yws, cb1);
3434  __m128i gws = _mm_add_epi16(gwt, cr1);
3435 
3436  // descale
3437  __m128i rw = _mm_srai_epi16(rws, 4);
3438  __m128i bw = _mm_srai_epi16(bws, 4);
3439  __m128i gw = _mm_srai_epi16(gws, 4);
3440 
3441  // back to byte, set up for transpose
3442  __m128i brb = _mm_packus_epi16(rw, bw);
3443  __m128i gxb = _mm_packus_epi16(gw, xw);
3444 
3445  // transpose to interleave channels
3446  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3447  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3448  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3449  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3450 
3451  // store
3452  _mm_storeu_si128((__m128i *) (out + 0), o0);
3453  _mm_storeu_si128((__m128i *) (out + 16), o1);
3454  out += 32;
3455  }
3456  }
3457 #endif
3458 
3459 #ifdef STBI_NEON
3460  // in this version, step=3 support would be easy to add. but is there demand?
3461  if (step == 4) {
3462  // this is a fairly straightforward implementation and not super-optimized.
3463  uint8x8_t signflip = vdup_n_u8(0x80);
3464  int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3465  int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3466  int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3467  int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3468 
3469  for (; i+7 < count; i += 8) {
3470  // load
3471  uint8x8_t y_bytes = vld1_u8(y + i);
3472  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3473  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3474  int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3475  int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3476 
3477  // expand to s16
3478  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3479  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3480  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3481 
3482  // color transform
3483  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3484  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3485  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3486  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3487  int16x8_t rws = vaddq_s16(yws, cr0);
3488  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3489  int16x8_t bws = vaddq_s16(yws, cb1);
3490 
3491  // undo scaling, round, convert to byte
3492  uint8x8x4_t o;
3493  o.val[0] = vqrshrun_n_s16(rws, 4);
3494  o.val[1] = vqrshrun_n_s16(gws, 4);
3495  o.val[2] = vqrshrun_n_s16(bws, 4);
3496  o.val[3] = vdup_n_u8(255);
3497 
3498  // store, interleaving r/g/b/a
3499  vst4_u8(out, o);
3500  out += 8*4;
3501  }
3502  }
3503 #endif
3504 
3505  for (; i < count; ++i) {
3506  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3507  int r,g,b;
3508  int cr = pcr[i] - 128;
3509  int cb = pcb[i] - 128;
3510  r = y_fixed + cr* stbi__float2fixed(1.40200f);
3511  g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3512  b = y_fixed + cb* stbi__float2fixed(1.77200f);
3513  r >>= 20;
3514  g >>= 20;
3515  b >>= 20;
3516  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3517  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3518  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3519  out[0] = (stbi_uc)r;
3520  out[1] = (stbi_uc)g;
3521  out[2] = (stbi_uc)b;
3522  out[3] = 255;
3523  out += step;
3524  }
3525 }
3526 #endif
3527 
3528 // set up the kernels
3529 static void stbi__setup_jpeg(stbi__jpeg *j)
3530 {
3531  j->idct_block_kernel = stbi__idct_block;
3532  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3533  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3534 
3535 #ifdef STBI_SSE2
3536  if (stbi__sse2_available()) {
3537  j->idct_block_kernel = stbi__idct_simd;
3538  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3539  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3540  }
3541 #endif
3542 
3543 #ifdef STBI_NEON
3544  j->idct_block_kernel = stbi__idct_simd;
3545  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3546  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3547 #endif
3548 }
3549 
3550 // clean up the temporary component buffers
3551 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3552 {
3553  stbi__free_jpeg_components(j, j->s->img_n, 0);
3554 }
3555 
3556 typedef struct
3557 {
3558  resample_row_func resample;
3559  stbi_uc *line0,*line1;
3560  int hs,vs; // expansion factor in each axis
3561  int w_lores; // horizontal pixels pre-expansion
3562  int ystep; // how far through vertical expansion we are
3563  int ypos; // which pre-expansion row we're on
3564 } stbi__resample;
3565 
3566 // fast 0..255 * 0..255 => 0..255 rounded multiplication
3567 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3568 {
3569  unsigned int t = x*y + 128;
3570  return (stbi_uc) ((t + (t >>8)) >> 8);
3571 }
3572 
3573 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3574 {
3575  int n, decode_n, is_rgb;
3576  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3577 
3578  // validate req_comp
3579  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3580 
3581  // load a jpeg image from whichever source, but leave in YCbCr format
3582  if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3583 
3584  // determine actual number of components to generate
3585  n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3586 
3587  is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3588 
3589  if (z->s->img_n == 3 && n < 3 && !is_rgb)
3590  decode_n = 1;
3591  else
3592  decode_n = z->s->img_n;
3593 
3594  // resample and color-convert
3595  {
3596  int k;
3597  unsigned int i,j;
3598  stbi_uc *output;
3599  stbi_uc *coutput[4];
3600 
3601  stbi__resample res_comp[4];
3602 
3603  for (k=0; k < decode_n; ++k) {
3604  stbi__resample *r = &res_comp[k];
3605 
3606  // allocate line buffer big enough for upsampling off the edges
3607  // with upsample factor of 4
3608  z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3609  if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3610 
3611  r->hs = z->img_h_max / z->img_comp[k].h;
3612  r->vs = z->img_v_max / z->img_comp[k].v;
3613  r->ystep = r->vs >> 1;
3614  r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3615  r->ypos = 0;
3616  r->line0 = r->line1 = z->img_comp[k].data;
3617 
3618  if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3619  else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3620  else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3621  else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3622  else r->resample = stbi__resample_row_generic;
3623  }
3624 
3625  // can't error after this so, this is safe
3626  output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3627  if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3628 
3629  // now go ahead and resample
3630  for (j=0; j < z->s->img_y; ++j) {
3631  stbi_uc *out = output + n * z->s->img_x * j;
3632  for (k=0; k < decode_n; ++k) {
3633  stbi__resample *r = &res_comp[k];
3634  int y_bot = r->ystep >= (r->vs >> 1);
3635  coutput[k] = r->resample(z->img_comp[k].linebuf,
3636  y_bot ? r->line1 : r->line0,
3637  y_bot ? r->line0 : r->line1,
3638  r->w_lores, r->hs);
3639  if (++r->ystep >= r->vs) {
3640  r->ystep = 0;
3641  r->line0 = r->line1;
3642  if (++r->ypos < z->img_comp[k].y)
3643  r->line1 += z->img_comp[k].w2;
3644  }
3645  }
3646  if (n >= 3) {
3647  stbi_uc *y = coutput[0];
3648  if (z->s->img_n == 3) {
3649  if (is_rgb) {
3650  for (i=0; i < z->s->img_x; ++i) {
3651  out[0] = y[i];
3652  out[1] = coutput[1][i];
3653  out[2] = coutput[2][i];
3654  out[3] = 255;
3655  out += n;
3656  }
3657  } else {
3658  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3659  }
3660  } else if (z->s->img_n == 4) {
3661  if (z->app14_color_transform == 0) { // CMYK
3662  for (i=0; i < z->s->img_x; ++i) {
3663  stbi_uc m = coutput[3][i];
3664  out[0] = stbi__blinn_8x8(coutput[0][i], m);
3665  out[1] = stbi__blinn_8x8(coutput[1][i], m);
3666  out[2] = stbi__blinn_8x8(coutput[2][i], m);
3667  out[3] = 255;
3668  out += n;
3669  }
3670  } else if (z->app14_color_transform == 2) { // YCCK
3671  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3672  for (i=0; i < z->s->img_x; ++i) {
3673  stbi_uc m = coutput[3][i];
3674  out[0] = stbi__blinn_8x8(255 - out[0], m);
3675  out[1] = stbi__blinn_8x8(255 - out[1], m);
3676  out[2] = stbi__blinn_8x8(255 - out[2], m);
3677  out += n;
3678  }
3679  } else { // YCbCr + alpha? Ignore the fourth channel for now
3680  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3681  }
3682  } else
3683  for (i=0; i < z->s->img_x; ++i) {
3684  out[0] = out[1] = out[2] = y[i];
3685  out[3] = 255; // not used if n==3
3686  out += n;
3687  }
3688  } else {
3689  if (is_rgb) {
3690  if (n == 1)
3691  for (i=0; i < z->s->img_x; ++i)
3692  *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3693  else {
3694  for (i=0; i < z->s->img_x; ++i, out += 2) {
3695  out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3696  out[1] = 255;
3697  }
3698  }
3699  } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3700  for (i=0; i < z->s->img_x; ++i) {
3701  stbi_uc m = coutput[3][i];
3702  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3703  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3704  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3705  out[0] = stbi__compute_y(r, g, b);
3706  out[1] = 255;
3707  out += n;
3708  }
3709  } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3710  for (i=0; i < z->s->img_x; ++i) {
3711  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3712  out[1] = 255;
3713  out += n;
3714  }
3715  } else {
3716  stbi_uc *y = coutput[0];
3717  if (n == 1)
3718  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3719  else
3720  for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3721  }
3722  }
3723  }
3724  stbi__cleanup_jpeg(z);
3725  *out_x = z->s->img_x;
3726  *out_y = z->s->img_y;
3727  if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3728  return output;
3729  }
3730 }
3731 
3732 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3733 {
3734  unsigned char* result;
3735  stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3736  STBI_NOTUSED(ri);
3737  j->s = s;
3738  stbi__setup_jpeg(j);
3739  result = load_jpeg_image(j, x,y,comp,req_comp);
3740  STBI_FREE(j);
3741  return result;
3742 }
3743 
3744 static int stbi__jpeg_test(stbi__context *s)
3745 {
3746  int r;
3747  stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3748  j->s = s;
3749  stbi__setup_jpeg(j);
3750  r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3751  stbi__rewind(s);
3752  STBI_FREE(j);
3753  return r;
3754 }
3755 
3756 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3757 {
3758  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3759  stbi__rewind( j->s );
3760  return 0;
3761  }
3762  if (x) *x = j->s->img_x;
3763  if (y) *y = j->s->img_y;
3764  if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3765  return 1;
3766 }
3767 
3768 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3769 {
3770  int result;
3771  stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3772  j->s = s;
3773  result = stbi__jpeg_info_raw(j, x, y, comp);
3774  STBI_FREE(j);
3775  return result;
3776 }
3777 #endif
3778 
3779 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
3780 // simple implementation
3781 // - all input must be provided in an upfront buffer
3782 // - all output is written to a single output buffer (can malloc/realloc)
3783 // performance
3784 // - fast huffman
3785 
3786 #ifndef STBI_NO_ZLIB
3787 
3788 // fast-way is faster to check than jpeg huffman, but slow way is slower
3789 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3790 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3791 
3792 // zlib-style huffman encoding
3793 // (jpegs packs from left, zlib from right, so can't share code)
3794 typedef struct
3795 {
3796  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3797  stbi__uint16 firstcode[16];
3798  int maxcode[17];
3799  stbi__uint16 firstsymbol[16];
3800  stbi_uc size[288];
3801  stbi__uint16 value[288];
3802 } stbi__zhuffman;
3803 
3804 stbi_inline static int stbi__bitreverse16(int n)
3805 {
3806  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3807  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3808  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3809  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3810  return n;
3811 }
3812 
3813 stbi_inline static int stbi__bit_reverse(int v, int bits)
3814 {
3815  STBI_ASSERT(bits <= 16);
3816  // to bit reverse n bits, reverse 16 and shift
3817  // e.g. 11 bits, bit reverse and shift away 5
3818  return stbi__bitreverse16(v) >> (16-bits);
3819 }
3820 
3821 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3822 {
3823  int i,k=0;
3824  int code, next_code[16], sizes[17];
3825 
3826  // DEFLATE spec for generating codes
3827  memset(sizes, 0, sizeof(sizes));
3828  memset(z->fast, 0, sizeof(z->fast));
3829  for (i=0; i < num; ++i)
3830  ++sizes[sizelist[i]];
3831  sizes[0] = 0;
3832  for (i=1; i < 16; ++i)
3833  if (sizes[i] > (1 << i))
3834  return stbi__err("bad sizes", "Corrupt PNG");
3835  code = 0;
3836  for (i=1; i < 16; ++i) {
3837  next_code[i] = code;
3838  z->firstcode[i] = (stbi__uint16) code;
3839  z->firstsymbol[i] = (stbi__uint16) k;
3840  code = (code + sizes[i]);
3841  if (sizes[i])
3842  if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3843  z->maxcode[i] = code << (16-i); // preshift for inner loop
3844  code <<= 1;
3845  k += sizes[i];
3846  }
3847  z->maxcode[16] = 0x10000; // sentinel
3848  for (i=0; i < num; ++i) {
3849  int s = sizelist[i];
3850  if (s) {
3851  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3852  stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3853  z->size [c] = (stbi_uc ) s;
3854  z->value[c] = (stbi__uint16) i;
3855  if (s <= STBI__ZFAST_BITS) {
3856  int j = stbi__bit_reverse(next_code[s],s);
3857  while (j < (1 << STBI__ZFAST_BITS)) {
3858  z->fast[j] = fastv;
3859  j += (1 << s);
3860  }
3861  }
3862  ++next_code[s];
3863  }
3864  }
3865  return 1;
3866 }
3867 
3868 // zlib-from-memory implementation for PNG reading
3869 // because PNG allows splitting the zlib stream arbitrarily,
3870 // and it's annoying structurally to have PNG call ZLIB call PNG,
3871 // we require PNG read all the IDATs and combine them into a single
3872 // memory buffer
3873 
3874 typedef struct
3875 {
3876  stbi_uc *zbuffer, *zbuffer_end;
3877  int num_bits;
3878  stbi__uint32 code_buffer;
3879 
3880  char *zout;
3881  char *zout_start;
3882  char *zout_end;
3883  int z_expandable;
3884 
3885  stbi__zhuffman z_length, z_distance;
3886 } stbi__zbuf;
3887 
3888 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3889 {
3890  if (z->zbuffer >= z->zbuffer_end) return 0;
3891  return *z->zbuffer++;
3892 }
3893 
3894 static void stbi__fill_bits(stbi__zbuf *z)
3895 {
3896  do {
3897  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3898  z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3899  z->num_bits += 8;
3900  } while (z->num_bits <= 24);
3901 }
3902 
3903 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3904 {
3905  unsigned int k;
3906  if (z->num_bits < n) stbi__fill_bits(z);
3907  k = z->code_buffer & ((1 << n) - 1);
3908  z->code_buffer >>= n;
3909  z->num_bits -= n;
3910  return k;
3911 }
3912 
3913 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3914 {
3915  int b,s,k;
3916  // not resolved by fast table, so compute it the slow way
3917  // use jpeg approach, which requires MSbits at top
3918  k = stbi__bit_reverse(a->code_buffer, 16);
3919  for (s=STBI__ZFAST_BITS+1; ; ++s)
3920  if (k < z->maxcode[s])
3921  break;
3922  if (s == 16) return -1; // invalid code!
3923  // code size is s, so:
3924  b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3925  STBI_ASSERT(z->size[b] == s);
3926  a->code_buffer >>= s;
3927  a->num_bits -= s;
3928  return z->value[b];
3929 }
3930 
3931 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3932 {
3933  int b,s;
3934  if (a->num_bits < 16) stbi__fill_bits(a);
3935  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3936  if (b) {
3937  s = b >> 9;
3938  a->code_buffer >>= s;
3939  a->num_bits -= s;
3940  return b & 511;
3941  }
3942  return stbi__zhuffman_decode_slowpath(a, z);
3943 }
3944 
3945 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3946 {
3947  char *q;
3948  int cur, limit, old_limit;
3949  z->zout = zout;
3950  if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3951  cur = (int) (z->zout - z->zout_start);
3952  limit = old_limit = (int) (z->zout_end - z->zout_start);
3953  while (cur + n > limit)
3954  limit *= 2;
3955  q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3956  STBI_NOTUSED(old_limit);
3957  if (q == NULL) return stbi__err("outofmem", "Out of memory");
3958  z->zout_start = q;
3959  z->zout = q + cur;
3960  z->zout_end = q + limit;
3961  return 1;
3962 }
3963 
3964 static const int stbi__zlength_base[31] = {
3965  3,4,5,6,7,8,9,10,11,13,
3966  15,17,19,23,27,31,35,43,51,59,
3967  67,83,99,115,131,163,195,227,258,0,0 };
3968 
3969 static const int stbi__zlength_extra[31]=
3970 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3971 
3972 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3973 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3974 
3975 static const int stbi__zdist_extra[32] =
3976 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3977 
3978 static int stbi__parse_huffman_block(stbi__zbuf *a)
3979 {
3980  char *zout = a->zout;
3981  for(;;) {
3982  int z = stbi__zhuffman_decode(a, &a->z_length);
3983  if (z < 256) {
3984  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3985  if (zout >= a->zout_end) {
3986  if (!stbi__zexpand(a, zout, 1)) return 0;
3987  zout = a->zout;
3988  }
3989  *zout++ = (char) z;
3990  } else {
3991  stbi_uc *p;
3992  int len,dist;
3993  if (z == 256) {
3994  a->zout = zout;
3995  return 1;
3996  }
3997  z -= 257;
3998  len = stbi__zlength_base[z];
3999  if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4000  z = stbi__zhuffman_decode(a, &a->z_distance);
4001  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4002  dist = stbi__zdist_base[z];
4003  if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4004  if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4005  if (zout + len > a->zout_end) {
4006  if (!stbi__zexpand(a, zout, len)) return 0;
4007  zout = a->zout;
4008  }
4009  p = (stbi_uc *) (zout - dist);
4010  if (dist == 1) { // run of one byte; common in images.
4011  stbi_uc v = *p;
4012  if (len) { do *zout++ = v; while (--len); }
4013  } else {
4014  if (len) { do *zout++ = *p++; while (--len); }
4015  }
4016  }
4017  }
4018 }
4019 
4020 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4021 {
4022  static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4023  stbi__zhuffman z_codelength;
4024  stbi_uc lencodes[286+32+137];//padding for maximum single op
4025  stbi_uc codelength_sizes[19];
4026  int i,n;
4027 
4028  int hlit = stbi__zreceive(a,5) + 257;
4029  int hdist = stbi__zreceive(a,5) + 1;
4030  int hclen = stbi__zreceive(a,4) + 4;
4031  int ntot = hlit + hdist;
4032 
4033  memset(codelength_sizes, 0, sizeof(codelength_sizes));
4034  for (i=0; i < hclen; ++i) {
4035  int s = stbi__zreceive(a,3);
4036  codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4037  }
4038  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4039 
4040  n = 0;
4041  while (n < ntot) {
4042  int c = stbi__zhuffman_decode(a, &z_codelength);
4043  if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4044  if (c < 16)
4045  lencodes[n++] = (stbi_uc) c;
4046  else {
4047  stbi_uc fill = 0;
4048  if (c == 16) {
4049  c = stbi__zreceive(a,2)+3;
4050  if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4051  fill = lencodes[n-1];
4052  } else if (c == 17)
4053  c = stbi__zreceive(a,3)+3;
4054  else {
4055  STBI_ASSERT(c == 18);
4056  c = stbi__zreceive(a,7)+11;
4057  }
4058  if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4059  memset(lencodes+n, fill, c);
4060  n += c;
4061  }
4062  }
4063  if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4064  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4065  if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4066  return 1;
4067 }
4068 
4069 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4070 {
4071  stbi_uc header[4];
4072  int len,nlen,k;
4073  if (a->num_bits & 7)
4074  stbi__zreceive(a, a->num_bits & 7); // discard
4075  // drain the bit-packed data into header
4076  k = 0;
4077  while (a->num_bits > 0) {
4078  header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4079  a->code_buffer >>= 8;
4080  a->num_bits -= 8;
4081  }
4082  STBI_ASSERT(a->num_bits == 0);
4083  // now fill header the normal way
4084  while (k < 4)
4085  header[k++] = stbi__zget8(a);
4086  len = header[1] * 256 + header[0];
4087  nlen = header[3] * 256 + header[2];
4088  if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4089  if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4090  if (a->zout + len > a->zout_end)
4091  if (!stbi__zexpand(a, a->zout, len)) return 0;
4092  memcpy(a->zout, a->zbuffer, len);
4093  a->zbuffer += len;
4094  a->zout += len;
4095  return 1;
4096 }
4097 
4098 static int stbi__parse_zlib_header(stbi__zbuf *a)
4099 {
4100  int cmf = stbi__zget8(a);
4101  int cm = cmf & 15;
4102  /* int cinfo = cmf >> 4; */
4103  int flg = stbi__zget8(a);
4104  if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4105  if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4106  if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4107  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4108  return 1;
4109 }
4110 
4111 static const stbi_uc stbi__zdefault_length[288] =
4112 {
4113  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4114  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4115  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4116  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4117  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4118  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4119  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4120  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4121  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4122 };
4123 static const stbi_uc stbi__zdefault_distance[32] =
4124 {
4125  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4126 };
4127 /*
4128 Init algorithm:
4129 {
4130  int i; // use <= to match clearly with spec
4131  for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
4132  for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
4133  for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
4134  for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
4135 
4136  for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
4137 }
4138 */
4139 
4140 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4141 {
4142  int final, type;
4143  if (parse_header)
4144  if (!stbi__parse_zlib_header(a)) return 0;
4145  a->num_bits = 0;
4146  a->code_buffer = 0;
4147  do {
4148  final = stbi__zreceive(a,1);
4149  type = stbi__zreceive(a,2);
4150  if (type == 0) {
4151  if (!stbi__parse_uncompressed_block(a)) return 0;
4152  } else if (type == 3) {
4153  return 0;
4154  } else {
4155  if (type == 1) {
4156  // use fixed code lengths
4157  if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
4158  if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
4159  } else {
4160  if (!stbi__compute_huffman_codes(a)) return 0;
4161  }
4162  if (!stbi__parse_huffman_block(a)) return 0;
4163  }
4164  } while (!final);
4165  return 1;
4166 }
4167 
4168 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4169 {
4170  a->zout_start = obuf;
4171  a->zout = obuf;
4172  a->zout_end = obuf + olen;
4173  a->z_expandable = exp;
4174 
4175  return stbi__parse_zlib(a, parse_header);
4176 }
4177 
4178 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4179 {
4180  stbi__zbuf a;
4181  char *p = (char *) stbi__malloc(initial_size);
4182  if (p == NULL) return NULL;
4183  a.zbuffer = (stbi_uc *) buffer;
4184  a.zbuffer_end = (stbi_uc *) buffer + len;
4185  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4186  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4187  return a.zout_start;
4188  } else {
4189  STBI_FREE(a.zout_start);
4190  return NULL;
4191  }
4192 }
4193 
4194 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4195 {
4196  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4197 }
4198 
4199 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4200 {
4201  stbi__zbuf a;
4202  char *p = (char *) stbi__malloc(initial_size);
4203  if (p == NULL) return NULL;
4204  a.zbuffer = (stbi_uc *) buffer;
4205  a.zbuffer_end = (stbi_uc *) buffer + len;
4206  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4207  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4208  return a.zout_start;
4209  } else {
4210  STBI_FREE(a.zout_start);
4211  return NULL;
4212  }
4213 }
4214 
4215 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4216 {
4217  stbi__zbuf a;
4218  a.zbuffer = (stbi_uc *) ibuffer;
4219  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4220  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4221  return (int) (a.zout - a.zout_start);
4222  else
4223  return -1;
4224 }
4225 
4226 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4227 {
4228  stbi__zbuf a;
4229  char *p = (char *) stbi__malloc(16384);
4230  if (p == NULL) return NULL;
4231  a.zbuffer = (stbi_uc *) buffer;
4232  a.zbuffer_end = (stbi_uc *) buffer+len;
4233  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4234  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4235  return a.zout_start;
4236  } else {
4237  STBI_FREE(a.zout_start);
4238  return NULL;
4239  }
4240 }
4241 
4242 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4243 {
4244  stbi__zbuf a;
4245  a.zbuffer = (stbi_uc *) ibuffer;
4246  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4247  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4248  return (int) (a.zout - a.zout_start);
4249  else
4250  return -1;
4251 }
4252 #endif
4253 
4254 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
4255 // simple implementation
4256 // - only 8-bit samples
4257 // - no CRC checking
4258 // - allocates lots of intermediate memory
4259 // - avoids problem of streaming data between subsystems
4260 // - avoids explicit window management
4261 // performance
4262 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4263 
4264 #ifndef STBI_NO_PNG
4265 typedef struct
4266 {
4267  stbi__uint32 length;
4268  stbi__uint32 type;
4269 } stbi__pngchunk;
4270 
4271 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4272 {
4273  stbi__pngchunk c;
4274  c.length = stbi__get32be(s);
4275  c.type = stbi__get32be(s);
4276  return c;
4277 }
4278 
4279 static int stbi__check_png_header(stbi__context *s)
4280 {
4281  static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4282  int i;
4283  for (i=0; i < 8; ++i)
4284  if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4285  return 1;
4286 }
4287 
4288 typedef struct
4289 {
4290  stbi__context *s;
4291  stbi_uc *idata, *expanded, *out;
4292  int depth;
4293  float gamma = 0;
4294 } stbi__png;
4295 
4296 
4297 enum {
4298  STBI__F_none=0,
4299  STBI__F_sub=1,
4300  STBI__F_up=2,
4301  STBI__F_avg=3,
4302  STBI__F_paeth=4,
4303  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4304  STBI__F_avg_first,
4305  STBI__F_paeth_first
4306 };
4307 
4308 static stbi_uc first_row_filter[5] =
4309 {
4310  STBI__F_none,
4311  STBI__F_sub,
4312  STBI__F_none,
4313  STBI__F_avg_first,
4314  STBI__F_paeth_first
4315 };
4316 
4317 static int stbi__paeth(int a, int b, int c)
4318 {
4319  int p = a + b - c;
4320  int pa = abs(p-a);
4321  int pb = abs(p-b);
4322  int pc = abs(p-c);
4323  if (pa <= pb && pa <= pc) return a;
4324  if (pb <= pc) return b;
4325  return c;
4326 }
4327 
4328 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4329 
4330 // create the png data from post-deflated data
4331 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4332 {
4333  int bytes = (depth == 16? 2 : 1);
4334  stbi__context *s = a->s;
4335  stbi__uint32 i,j,stride = x*out_n*bytes;
4336  stbi__uint32 img_len, img_width_bytes;
4337  int k;
4338  int img_n = s->img_n; // copy it into a local for later
4339 
4340  int output_bytes = out_n*bytes;
4341  int filter_bytes = img_n*bytes;
4342  int width = x;
4343 
4344  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4345  a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4346  if (!a->out) return stbi__err("outofmem", "Out of memory");
4347 
4348  if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4349  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4350  img_len = (img_width_bytes + 1) * y;
4351 
4352  // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4353  // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4354  // so just check for raw_len < img_len always.
4355  if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4356 
4357  for (j=0; j < y; ++j) {
4358  stbi_uc *cur = a->out + stride*j;
4359  stbi_uc *prior;
4360  int filter = *raw++;
4361 
4362  if (filter > 4)
4363  return stbi__err("invalid filter","Corrupt PNG");
4364 
4365  if (depth < 8) {
4366  STBI_ASSERT(img_width_bytes <= x);
4367  cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4368  filter_bytes = 1;
4369  width = img_width_bytes;
4370  }
4371  prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4372 
4373  // if first row, use special filter that doesn't sample previous row
4374  if (j == 0) filter = first_row_filter[filter];
4375 
4376  // handle first byte explicitly
4377  for (k=0; k < filter_bytes; ++k) {
4378  switch (filter) {
4379  case STBI__F_none : cur[k] = raw[k]; break;
4380  case STBI__F_sub : cur[k] = raw[k]; break;
4381  case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4382  case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4383  case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4384  case STBI__F_avg_first : cur[k] = raw[k]; break;
4385  case STBI__F_paeth_first: cur[k] = raw[k]; break;
4386  }
4387  }
4388 
4389  if (depth == 8) {
4390  if (img_n != out_n)
4391  cur[img_n] = 255; // first pixel
4392  raw += img_n;
4393  cur += out_n;
4394  prior += out_n;
4395  } else if (depth == 16) {
4396  if (img_n != out_n) {
4397  cur[filter_bytes] = 255; // first pixel top byte
4398  cur[filter_bytes+1] = 255; // first pixel bottom byte
4399  }
4400  raw += filter_bytes;
4401  cur += output_bytes;
4402  prior += output_bytes;
4403  } else {
4404  raw += 1;
4405  cur += 1;
4406  prior += 1;
4407  }
4408 
4409  // this is a little gross, so that we don't switch per-pixel or per-component
4410  if (depth < 8 || img_n == out_n) {
4411  int nk = (width - 1)*filter_bytes;
4412  #define STBI__CASE(f) \
4413  case f: \
4414  for (k=0; k < nk; ++k)
4415  switch (filter) {
4416  // "none" filter turns into a memcpy here; make that explicit.
4417  case STBI__F_none: memcpy(cur, raw, nk); break;
4418  STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4419  STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4420  STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4421  STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4422  STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4423  STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4424  }
4425  #undef STBI__CASE
4426  raw += nk;
4427  } else {
4428  STBI_ASSERT(img_n+1 == out_n);
4429  #define STBI__CASE(f) \
4430  case f: \
4431  for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4432  for (k=0; k < filter_bytes; ++k)
4433  switch (filter) {
4434  STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
4435  STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4436  STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4437  STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4438  STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4439  STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4440  STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4441  }
4442  #undef STBI__CASE
4443 
4444  // the loop above sets the high byte of the pixels' alpha, but for
4445  // 16 bit png files we also need the low byte set. we'll do that here.
4446  if (depth == 16) {
4447  cur = a->out + stride*j; // start at the beginning of the row again
4448  for (i=0; i < x; ++i,cur+=output_bytes) {
4449  cur[filter_bytes+1] = 255;
4450  }
4451  }
4452  }
4453  }
4454 
4455  // we make a separate pass to expand bits to pixels; for performance,
4456  // this could run two scanlines behind the above code, so it won't
4457  // intefere with filtering but will still be in the cache.
4458  if (depth < 8) {
4459  for (j=0; j < y; ++j) {
4460  stbi_uc *cur = a->out + stride*j;
4461  stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4462  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4463  // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4464  stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4465 
4466  // note that the final byte might overshoot and write more data than desired.
4467  // we can allocate enough data that this never writes out of memory, but it
4468  // could also overwrite the next scanline. can it overwrite non-empty data
4469  // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4470  // so we need to explicitly clamp the final ones
4471 
4472  if (depth == 4) {
4473  for (k=x*img_n; k >= 2; k-=2, ++in) {
4474  *cur++ = scale * ((*in >> 4) );
4475  *cur++ = scale * ((*in ) & 0x0f);
4476  }
4477  if (k > 0) *cur++ = scale * ((*in >> 4) );
4478  } else if (depth == 2) {
4479  for (k=x*img_n; k >= 4; k-=4, ++in) {
4480  *cur++ = scale * ((*in >> 6) );
4481  *cur++ = scale * ((*in >> 4) & 0x03);
4482  *cur++ = scale * ((*in >> 2) & 0x03);
4483  *cur++ = scale * ((*in ) & 0x03);
4484  }
4485  if (k > 0) *cur++ = scale * ((*in >> 6) );
4486  if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4487  if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4488  } else if (depth == 1) {
4489  for (k=x*img_n; k >= 8; k-=8, ++in) {
4490  *cur++ = scale * ((*in >> 7) );
4491  *cur++ = scale * ((*in >> 6) & 0x01);
4492  *cur++ = scale * ((*in >> 5) & 0x01);
4493  *cur++ = scale * ((*in >> 4) & 0x01);
4494  *cur++ = scale * ((*in >> 3) & 0x01);
4495  *cur++ = scale * ((*in >> 2) & 0x01);
4496  *cur++ = scale * ((*in >> 1) & 0x01);
4497  *cur++ = scale * ((*in ) & 0x01);
4498  }
4499  if (k > 0) *cur++ = scale * ((*in >> 7) );
4500  if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4501  if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4502  if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4503  if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4504  if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4505  if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4506  }
4507  if (img_n != out_n) {
4508  int q;
4509  // insert alpha = 255
4510  cur = a->out + stride*j;
4511  if (img_n == 1) {
4512  for (q=x-1; q >= 0; --q) {
4513  cur[q*2+1] = 255;
4514  cur[q*2+0] = cur[q];
4515  }
4516  } else {
4517  STBI_ASSERT(img_n == 3);
4518  for (q=x-1; q >= 0; --q) {
4519  cur[q*4+3] = 255;
4520  cur[q*4+2] = cur[q*3+2];
4521  cur[q*4+1] = cur[q*3+1];
4522  cur[q*4+0] = cur[q*3+0];
4523  }
4524  }
4525  }
4526  }
4527  } else if (depth == 16) {
4528  // force the image data from big-endian to platform-native.
4529  // this is done in a separate pass due to the decoding relying
4530  // on the data being untouched, but could probably be done
4531  // per-line during decode if care is taken.
4532  stbi_uc *cur = a->out;
4533  stbi__uint16 *cur16 = (stbi__uint16*)cur;
4534 
4535  for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4536  *cur16 = (cur[0] << 8) | cur[1];
4537  }
4538  }
4539 
4540  return 1;
4541 }
4542 
4543 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4544 {
4545  int bytes = (depth == 16 ? 2 : 1);
4546  int out_bytes = out_n * bytes;
4547  stbi_uc *final;
4548  int p;
4549  if (!interlaced)
4550  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4551 
4552  // de-interlacing
4553  final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4554  for (p=0; p < 7; ++p) {
4555  int xorig[] = { 0,4,0,2,0,1,0 };
4556  int yorig[] = { 0,0,4,0,2,0,1 };
4557  int xspc[] = { 8,8,4,4,2,2,1 };
4558  int yspc[] = { 8,8,8,4,4,2,2 };
4559  int i,j,x,y;
4560  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4561  x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4562  y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4563  if (x && y) {
4564  stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4565  if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4566  STBI_FREE(final);
4567  return 0;
4568  }
4569  for (j=0; j < y; ++j) {
4570  for (i=0; i < x; ++i) {
4571  int out_y = j*yspc[p]+yorig[p];
4572  int out_x = i*xspc[p]+xorig[p];
4573  memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4574  a->out + (j*x+i)*out_bytes, out_bytes);
4575  }
4576  }
4577  STBI_FREE(a->out);
4578  image_data += img_len;
4579  image_data_len -= img_len;
4580  }
4581  }
4582  a->out = final;
4583 
4584  return 1;
4585 }
4586 
4587 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4588 {
4589  stbi__context *s = z->s;
4590  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4591  stbi_uc *p = z->out;
4592 
4593  // compute color-based transparency, assuming we've
4594  // already got 255 as the alpha value in the output
4595  STBI_ASSERT(out_n == 2 || out_n == 4);
4596 
4597  if (out_n == 2) {
4598  for (i=0; i < pixel_count; ++i) {
4599  p[1] = (p[0] == tc[0] ? 0 : 255);
4600  p += 2;
4601  }
4602  } else {
4603  for (i=0; i < pixel_count; ++i) {
4604  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4605  p[3] = 0;
4606  p += 4;
4607  }
4608  }
4609  return 1;
4610 }
4611 
4612 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4613 {
4614  stbi__context *s = z->s;
4615  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4616  stbi__uint16 *p = (stbi__uint16*) z->out;
4617 
4618  // compute color-based transparency, assuming we've
4619  // already got 65535 as the alpha value in the output
4620  STBI_ASSERT(out_n == 2 || out_n == 4);
4621 
4622  if (out_n == 2) {
4623  for (i = 0; i < pixel_count; ++i) {
4624  p[1] = (p[0] == tc[0] ? 0 : 65535);
4625  p += 2;
4626  }
4627  } else {
4628  for (i = 0; i < pixel_count; ++i) {
4629  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4630  p[3] = 0;
4631  p += 4;
4632  }
4633  }
4634  return 1;
4635 }
4636 
4637 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4638 {
4639  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4640  stbi_uc *p, *temp_out, *orig = a->out;
4641 
4642  p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4643  if (p == NULL) return stbi__err("outofmem", "Out of memory");
4644 
4645  // between here and free(out) below, exitting would leak
4646  temp_out = p;
4647 
4648  if (pal_img_n == 3) {
4649  for (i=0; i < pixel_count; ++i) {
4650  int n = orig[i]*4;
4651  p[0] = palette[n ];
4652  p[1] = palette[n+1];
4653  p[2] = palette[n+2];
4654  p += 3;
4655  }
4656  } else {
4657  for (i=0; i < pixel_count; ++i) {
4658  int n = orig[i]*4;
4659  p[0] = palette[n ];
4660  p[1] = palette[n+1];
4661  p[2] = palette[n+2];
4662  p[3] = palette[n+3];
4663  p += 4;
4664  }
4665  }
4666  STBI_FREE(a->out);
4667  a->out = temp_out;
4668 
4669  STBI_NOTUSED(len);
4670 
4671  return 1;
4672 }
4673 
4674 static int stbi__unpremultiply_on_load = 0;
4675 static int stbi__de_iphone_flag = 0;
4676 
4677 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4678 {
4679  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4680 }
4681 
4682 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4683 {
4684  stbi__de_iphone_flag = flag_true_if_should_convert;
4685 }
4686 
4687 static void stbi__de_iphone(stbi__png *z)
4688 {
4689  stbi__context *s = z->s;
4690  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4691  stbi_uc *p = z->out;
4692 
4693  if (s->img_out_n == 3) { // convert bgr to rgb
4694  for (i=0; i < pixel_count; ++i) {
4695  stbi_uc t = p[0];
4696  p[0] = p[2];
4697  p[2] = t;
4698  p += 3;
4699  }
4700  } else {
4701  STBI_ASSERT(s->img_out_n == 4);
4702  if (stbi__unpremultiply_on_load) {
4703  // convert bgr to rgb and unpremultiply
4704  for (i=0; i < pixel_count; ++i) {
4705  stbi_uc a = p[3];
4706  stbi_uc t = p[0];
4707  if (a) {
4708  stbi_uc half = a / 2;
4709  p[0] = (p[2] * 255 + half) / a;
4710  p[1] = (p[1] * 255 + half) / a;
4711  p[2] = ( t * 255 + half) / a;
4712  } else {
4713  p[0] = p[2];
4714  p[2] = t;
4715  }
4716  p += 4;
4717  }
4718  } else {
4719  // convert bgr to rgb
4720  for (i=0; i < pixel_count; ++i) {
4721  stbi_uc t = p[0];
4722  p[0] = p[2];
4723  p[2] = t;
4724  p += 4;
4725  }
4726  }
4727  }
4728 }
4729 
4730 #define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4731 
4732 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4733 {
4734  stbi_uc palette[1024], pal_img_n=0;
4735  stbi_uc has_trans=0, tc[3];
4736  stbi__uint16 tc16[3];
4737  stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4738  int first=1,k,interlace=0, color=0, is_iphone=0;
4739  stbi__context *s = z->s;
4740 
4741  z->expanded = NULL;
4742  z->idata = NULL;
4743  z->out = NULL;
4744 
4745  if (!stbi__check_png_header(s)) return 0;
4746 
4747  if (scan == STBI__SCAN_type) return 1;
4748 
4749  for (;;) {
4750  stbi__pngchunk c = stbi__get_chunk_header(s);
4751  switch (c.type) {
4752  case STBI__PNG_TYPE('C','g','B','I'):
4753  is_iphone = 1;
4754  stbi__skip(s, c.length);
4755  break;
4756  case STBI__PNG_TYPE('I','H','D','R'): {
4757  int comp,filter;
4758  if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4759  first = 0;
4760  if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4761  s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4762  s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4763  z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4764  color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4765  if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
4766  if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4767  comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4768  filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4769  interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4770  if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4771  if (!pal_img_n) {
4772  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4773  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4774  if (scan == STBI__SCAN_header) return 1;
4775  } else {
4776  // if paletted, then pal_n is our final components, and
4777  // img_n is # components to decompress/filter.
4778  s->img_n = 1;
4779  if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4780  // if SCAN_header, have to scan to see if we have a tRNS
4781  }
4782  break;
4783  }
4784 
4785  case STBI__PNG_TYPE('P','L','T','E'): {
4786  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4787  if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4788  pal_len = c.length / 3;
4789  if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4790  for (i=0; i < pal_len; ++i) {
4791  palette[i*4+0] = stbi__get8(s);
4792  palette[i*4+1] = stbi__get8(s);
4793  palette[i*4+2] = stbi__get8(s);
4794  palette[i*4+3] = 255;
4795  }
4796  break;
4797  }
4798 
4799  case STBI__PNG_TYPE('t','R','N','S'): {
4800  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4801  if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4802  if (pal_img_n) {
4803  if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4804  if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4805  if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4806  pal_img_n = 4;
4807  for (i=0; i < c.length; ++i)
4808  palette[i*4+3] = stbi__get8(s);
4809  } else {
4810  if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4811  if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4812  has_trans = 1;
4813  if (z->depth == 16) {
4814  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4815  } else {
4816  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4817  }
4818  }
4819  break;
4820  }
4821 
4822  case STBI__PNG_TYPE('g','A','M','A'): {
4823  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4824  if (4 != c.length) return stbi__err("invalid gAMA","Corrupt PNG");
4825  z->gamma = stbi__get32be(s) / 100000.0f;
4826  break;
4827  }
4828 
4829  case STBI__PNG_TYPE('I','D','A','T'): {
4830  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4831  if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4832  if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4833  if ((int)(ioff + c.length) < (int)ioff) return 0;
4834  if (ioff + c.length > idata_limit) {
4835  stbi__uint32 idata_limit_old = idata_limit;
4836  stbi_uc *p;
4837  if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4838  while (ioff + c.length > idata_limit)
4839  idata_limit *= 2;
4840  STBI_NOTUSED(idata_limit_old);
4841  p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4842  z->idata = p;
4843  }
4844  if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4845  ioff += c.length;
4846  break;
4847  }
4848 
4849  case STBI__PNG_TYPE('I','E','N','D'): {
4850  stbi__uint32 raw_len, bpl;
4851  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4852  if (scan != STBI__SCAN_load) return 1;
4853  if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4854  // initial guess for decoded data size to avoid unnecessary reallocs
4855  bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4856  raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4857  z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4858  if (z->expanded == NULL) return 0; // zlib should set error
4859  STBI_FREE(z->idata); z->idata = NULL;
4860  if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4861  s->img_out_n = s->img_n+1;
4862  else
4863  s->img_out_n = s->img_n;
4864  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4865  if (has_trans) {
4866  if (z->depth == 16) {
4867  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4868  } else {
4869  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4870  }
4871  }
4872  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4873  stbi__de_iphone(z);
4874  if (pal_img_n) {
4875  // pal_img_n == 3 or 4
4876  s->img_n = pal_img_n; // record the actual colors we had
4877  s->img_out_n = pal_img_n;
4878  if (req_comp >= 3) s->img_out_n = req_comp;
4879  if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4880  return 0;
4881  } else if (has_trans) {
4882  // non-paletted image with tRNS -> source image has (constant) alpha
4883  ++s->img_n;
4884  }
4885  STBI_FREE(z->expanded); z->expanded = NULL;
4886  return 1;
4887  }
4888 
4889  default:
4890  // if critical, fail
4891  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4892  if ((c.type & (1 << 29)) == 0) {
4893  #ifndef STBI_NO_FAILURE_STRINGS
4894  // not threadsafe
4895  static char invalid_chunk[] = "XXXX PNG chunk not known";
4896  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4897  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4898  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4899  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4900  #endif
4901  return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4902  }
4903  stbi__skip(s, c.length);
4904  break;
4905  }
4906  // end of PNG chunk, read and skip CRC
4907  stbi__get32be(s);
4908  }
4909 }
4910 
4911 // XXX PIXAR:
4912 // Helper functions for associating alpha values with colors
4913 static void pxr__associate_alpha(unsigned char *data, int x, int y, int n)
4914 {
4915  STBI_ASSERT(n == 4);
4916  stbi__uint32 i, pixel_count = x * y;
4917  for (i=0; i < pixel_count; ++i) {
4918  float alpha = (float)data[3] / 255.0f;
4919  if (alpha < 1.0) {
4920  data[0] = data[0] * alpha + 0.5;
4921  data[1] = data[1] * alpha + 0.5;
4922  data[2] = data[2] * alpha + 0.5;
4923  }
4924  data += 4;
4925  }
4926 }
4927 
4928 static void pxr__associate_alpha16(stbi__uint16 *data, int x, int y, int n)
4929 {
4930  STBI_ASSERT(n == 4);
4931  stbi__uint32 i, pixel_count = x * y;
4932  for (i=0; i < pixel_count; ++i) {
4933  float alpha = (float)data[3] / 65535.0f;
4934  if (alpha < 1.0) {
4935  data[0] = data[0] * alpha + 0.5;
4936  data[1] = data[1] * alpha + 0.5;
4937  data[2] = data[2] * alpha + 0.5;
4938  }
4939  data += 4;
4940  }
4941 }
4942 
4943 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4944 {
4945  void *result=NULL;
4946  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4947  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4948  if (p->depth < 8)
4949  ri->bits_per_channel = 8;
4950  else
4951  ri->bits_per_channel = p->depth;
4952  result = p->out;
4953  p->out = NULL;
4954  if (req_comp && req_comp != p->s->img_out_n) {
4955  if (ri->bits_per_channel == 8)
4956  result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4957  else
4958  result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4959  p->s->img_out_n = req_comp;
4960  if (result == NULL) return result;
4961  }
4962  *x = p->s->img_x;
4963  *y = p->s->img_y;
4964  if (n) *n = p->s->img_n;
4965 
4966  // XXX PIXAR:
4967  // Associate alpha to accommodate client code.
4968  if (p->s->img_n == 4) {
4969  if (ri->bits_per_channel == 8)
4970  pxr__associate_alpha((unsigned char *) result, *x, *y, p->s->img_n);
4971  else
4972  pxr__associate_alpha16((stbi__uint16 *) result, *x, *y, p->s->img_n);
4973  }
4974  }
4975  STBI_FREE(p->out); p->out = NULL;
4976  STBI_FREE(p->expanded); p->expanded = NULL;
4977  STBI_FREE(p->idata); p->idata = NULL;
4978 
4979  return result;
4980 }
4981 
4982 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4983 {
4984  stbi__png p;
4985  p.s = s;
4986  return stbi__do_png(&p, x,y,comp,req_comp, ri);
4987 }
4988 
4989 static int stbi__png_test(stbi__context *s)
4990 {
4991  int r;
4992  r = stbi__check_png_header(s);
4993  stbi__rewind(s);
4994  return r;
4995 }
4996 
4997 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp, float *gamma)
4998 {
4999  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5000  stbi__rewind( p->s );
5001  return 0;
5002  }
5003  if (x) *x = p->s->img_x;
5004  if (y) *y = p->s->img_y;
5005  if (comp) *comp = p->s->img_n;
5006  if (gamma) *gamma = p->gamma;
5007  return 1;
5008 }
5009 
5010 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp, float *gamma)
5011 {
5012  stbi__png p;
5013  p.s = s;
5014  return stbi__png_info_raw(&p, x, y, comp, gamma);
5015 }
5016 
5017 static int stbi__png_is16(stbi__context *s)
5018 {
5019  stbi__png p;
5020  p.s = s;
5021  if (!stbi__png_info_raw(&p, NULL, NULL, NULL, NULL))
5022  return 0;
5023  if (p.depth != 16) {
5024  stbi__rewind(p.s);
5025  return 0;
5026  }
5027  return 1;
5028 }
5029 #endif
5030 
5031 // Microsoft/Windows BMP image
5032 
5033 #ifndef STBI_NO_BMP
5034 static int stbi__bmp_test_raw(stbi__context *s)
5035 {
5036  int r;
5037  int sz;
5038  if (stbi__get8(s) != 'B') return 0;
5039  if (stbi__get8(s) != 'M') return 0;
5040  stbi__get32le(s); // discard filesize
5041  stbi__get16le(s); // discard reserved
5042  stbi__get16le(s); // discard reserved
5043  stbi__get32le(s); // discard data offset
5044  sz = stbi__get32le(s);
5045  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5046  return r;
5047 }
5048 
5049 static int stbi__bmp_test(stbi__context *s)
5050 {
5051  int r = stbi__bmp_test_raw(s);
5052  stbi__rewind(s);
5053  return r;
5054 }
5055 
5056 
5057 // returns 0..31 for the highest set bit
5058 static int stbi__high_bit(unsigned int z)
5059 {
5060  int n=0;
5061  if (z == 0) return -1;
5062  if (z >= 0x10000) n += 16, z >>= 16;
5063  if (z >= 0x00100) n += 8, z >>= 8;
5064  if (z >= 0x00010) n += 4, z >>= 4;
5065  if (z >= 0x00004) n += 2, z >>= 2;
5066  if (z >= 0x00002) n += 1, z >>= 1;
5067  return n;
5068 }
5069 
5070 static int stbi__bitcount(unsigned int a)
5071 {
5072  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
5073  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
5074  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5075  a = (a + (a >> 8)); // max 16 per 8 bits
5076  a = (a + (a >> 16)); // max 32 per 8 bits
5077  return a & 0xff;
5078 }
5079 
5080 // extract an arbitrarily-aligned N-bit value (N=bits)
5081 // from v, and then make it 8-bits long and fractionally
5082 // extend it to full full range.
5083 static int stbi__shiftsigned(int v, int shift, int bits)
5084 {
5085  static unsigned int mul_table[9] = {
5086  0,
5087  0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5088  0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5089  };
5090  static unsigned int shift_table[9] = {
5091  0, 0,0,1,0,2,4,6,0,
5092  };
5093  if (shift < 0)
5094  v <<= -shift;
5095  else
5096  v >>= shift;
5097  STBI_ASSERT(v >= 0 && v < 256);
5098  v >>= (8-bits);
5099  STBI_ASSERT(bits >= 0 && bits <= 8);
5100  return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5101 }
5102 
5103 typedef struct
5104 {
5105  int bpp, offset, hsz;
5106  unsigned int mr,mg,mb,ma, all_a;
5107 } stbi__bmp_data;
5108 
5109 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5110 {
5111  int hsz;
5112  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5113  stbi__get32le(s); // discard filesize
5114  stbi__get16le(s); // discard reserved
5115  stbi__get16le(s); // discard reserved
5116  info->offset = stbi__get32le(s);
5117  info->hsz = hsz = stbi__get32le(s);
5118  info->mr = info->mg = info->mb = info->ma = 0;
5119 
5120  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5121  if (hsz == 12) {
5122  s->img_x = stbi__get16le(s);
5123  s->img_y = stbi__get16le(s);
5124  } else {
5125  s->img_x = stbi__get32le(s);
5126  s->img_y = stbi__get32le(s);
5127  }
5128  if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5129  info->bpp = stbi__get16le(s);
5130  if (hsz != 12) {
5131  int compress = stbi__get32le(s);
5132  if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5133  stbi__get32le(s); // discard sizeof
5134  stbi__get32le(s); // discard hres
5135  stbi__get32le(s); // discard vres
5136  stbi__get32le(s); // discard colorsused
5137  stbi__get32le(s); // discard max important
5138  if (hsz == 40 || hsz == 56) {
5139  if (hsz == 56) {
5140  stbi__get32le(s);
5141  stbi__get32le(s);
5142  stbi__get32le(s);
5143  stbi__get32le(s);
5144  }
5145  if (info->bpp == 16 || info->bpp == 32) {
5146  if (compress == 0) {
5147  if (info->bpp == 32) {
5148  info->mr = 0xffu << 16;
5149  info->mg = 0xffu << 8;
5150  info->mb = 0xffu << 0;
5151  info->ma = 0xffu << 24;
5152  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5153  } else {
5154  info->mr = 31u << 10;
5155  info->mg = 31u << 5;
5156  info->mb = 31u << 0;
5157  }
5158  } else if (compress == 3) {
5159  info->mr = stbi__get32le(s);
5160  info->mg = stbi__get32le(s);
5161  info->mb = stbi__get32le(s);
5162  // not documented, but generated by photoshop and handled by mspaint
5163  if (info->mr == info->mg && info->mg == info->mb) {
5164  // ?!?!?
5165  return stbi__errpuc("bad BMP", "bad BMP");
5166  }
5167  } else
5168  return stbi__errpuc("bad BMP", "bad BMP");
5169  }
5170  } else {
5171  int i;
5172  if (hsz != 108 && hsz != 124)
5173  return stbi__errpuc("bad BMP", "bad BMP");
5174  info->mr = stbi__get32le(s);
5175  info->mg = stbi__get32le(s);
5176  info->mb = stbi__get32le(s);
5177  info->ma = stbi__get32le(s);
5178  stbi__get32le(s); // discard color space
5179  for (i=0; i < 12; ++i)
5180  stbi__get32le(s); // discard color space parameters
5181  if (hsz == 124) {
5182  stbi__get32le(s); // discard rendering intent
5183  stbi__get32le(s); // discard offset of profile data
5184  stbi__get32le(s); // discard size of profile data
5185  stbi__get32le(s); // discard reserved
5186  }
5187  }
5188  }
5189  return (void *) 1;
5190 }
5191 
5192 
5193 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5194 {
5195  stbi_uc *out;
5196  unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5197  stbi_uc pal[256][4];
5198  int psize=0,i,j,width;
5199  int flip_vertically, pad, target;
5200  stbi__bmp_data info;
5201  STBI_NOTUSED(ri);
5202 
5203  info.all_a = 255;
5204  if (stbi__bmp_parse_header(s, &info) == NULL)
5205  return NULL; // error code already set
5206 
5207  flip_vertically = ((int) s->img_y) > 0;
5208  s->img_y = abs((int) s->img_y);
5209 
5210  mr = info.mr;
5211  mg = info.mg;
5212  mb = info.mb;
5213  ma = info.ma;
5214  all_a = info.all_a;
5215 
5216  if (info.hsz == 12) {
5217  if (info.bpp < 24)
5218  psize = (info.offset - 14 - 24) / 3;
5219  } else {
5220  if (info.bpp < 16)
5221  psize = (info.offset - 14 - info.hsz) >> 2;
5222  }
5223 
5224  s->img_n = ma ? 4 : 3;
5225  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5226  target = req_comp;
5227  else
5228  target = s->img_n; // if they want monochrome, we'll post-convert
5229 
5230  // sanity-check size
5231  if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5232  return stbi__errpuc("too large", "Corrupt BMP");
5233 
5234  out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5235  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5236  if (info.bpp < 16) {
5237  int z=0;
5238  if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5239  for (i=0; i < psize; ++i) {
5240  pal[i][2] = stbi__get8(s);
5241  pal[i][1] = stbi__get8(s);
5242  pal[i][0] = stbi__get8(s);
5243  if (info.hsz != 12) stbi__get8(s);
5244  pal[i][3] = 255;
5245  }
5246  stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5247  if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5248  else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5249  else if (info.bpp == 8) width = s->img_x;
5250  else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5251  pad = (-width)&3;
5252  if (info.bpp == 1) {
5253  for (j=0; j < (int) s->img_y; ++j) {
5254  int bit_offset = 7, v = stbi__get8(s);
5255  for (i=0; i < (int) s->img_x; ++i) {
5256  int color = (v>>bit_offset)&0x1;
5257  out[z++] = pal[color][0];
5258  out[z++] = pal[color][1];
5259  out[z++] = pal[color][2];
5260  if((--bit_offset) < 0) {
5261  bit_offset = 7;
5262  v = stbi__get8(s);
5263  }
5264  }
5265  stbi__skip(s, pad);
5266  }
5267  } else {
5268  for (j=0; j < (int) s->img_y; ++j) {
5269  for (i=0; i < (int) s->img_x; i += 2) {
5270  int v=stbi__get8(s),v2=0;
5271  if (info.bpp == 4) {
5272  v2 = v & 15;
5273  v >>= 4;
5274  }
5275  out[z++] = pal[v][0];
5276  out[z++] = pal[v][1];
5277  out[z++] = pal[v][2];
5278  if (target == 4) out[z++] = 255;
5279  if (i+1 == (int) s->img_x) break;
5280  v = (info.bpp == 8) ? stbi__get8(s) : v2;
5281  out[z++] = pal[v][0];
5282  out[z++] = pal[v][1];
5283  out[z++] = pal[v][2];
5284  if (target == 4) out[z++] = 255;
5285  }
5286  stbi__skip(s, pad);
5287  }
5288  }
5289  } else {
5290  int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5291  int z = 0;
5292  int easy=0;
5293  stbi__skip(s, info.offset - 14 - info.hsz);
5294  if (info.bpp == 24) width = 3 * s->img_x;
5295  else if (info.bpp == 16) width = 2*s->img_x;
5296  else /* bpp = 32 and pad = 0 */ width=0;
5297  pad = (-width) & 3;
5298  if (info.bpp == 24) {
5299  easy = 1;
5300  } else if (info.bpp == 32) {
5301  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5302  easy = 2;
5303  }
5304  if (!easy) {
5305  if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5306  // right shift amt to put high bit in position #7
5307  rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5308  gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5309  bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5310  ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5311  }
5312  for (j=0; j < (int) s->img_y; ++j) {
5313  if (easy) {
5314  for (i=0; i < (int) s->img_x; ++i) {
5315  unsigned char a;
5316  out[z+2] = stbi__get8(s);
5317  out[z+1] = stbi__get8(s);
5318  out[z+0] = stbi__get8(s);
5319  z += 3;
5320  a = (easy == 2 ? stbi__get8(s) : 255);
5321  all_a |= a;
5322  if (target == 4) out[z++] = a;
5323  }
5324  } else {
5325  int bpp = info.bpp;
5326  for (i=0; i < (int) s->img_x; ++i) {
5327  stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5328  unsigned int a;
5329  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5330  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5331  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5332  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5333  all_a |= a;
5334  if (target == 4) out[z++] = STBI__BYTECAST(a);
5335  }
5336  }
5337  stbi__skip(s, pad);
5338  }
5339  }
5340 
5341  // if alpha channel is all 0s, replace with all 255s
5342  if (target == 4 && all_a == 0)
5343  for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5344  out[i] = 255;
5345 
5346  if (flip_vertically) {
5347  stbi_uc t;
5348  for (j=0; j < (int) s->img_y>>1; ++j) {
5349  stbi_uc *p1 = out + j *s->img_x*target;
5350  stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5351  for (i=0; i < (int) s->img_x*target; ++i) {
5352  t = p1[i], p1[i] = p2[i], p2[i] = t;
5353  }
5354  }
5355  }
5356 
5357  if (req_comp && req_comp != target) {
5358  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5359  if (out == NULL) return out; // stbi__convert_format frees input on failure
5360  }
5361 
5362  *x = s->img_x;
5363  *y = s->img_y;
5364  if (comp) *comp = s->img_n;
5365  return out;
5366 }
5367 #endif
5368 
5369 // Targa Truevision - TGA
5370 // by Jonathan Dummer
5371 #ifndef STBI_NO_TGA
5372 // returns STBI_rgb or whatever, 0 on error
5373 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5374 {
5375  // only RGB or RGBA (incl. 16bit) or grey allowed
5376  if (is_rgb16) *is_rgb16 = 0;
5377  switch(bits_per_pixel) {
5378  case 8: return STBI_grey;
5379  case 16: if(is_grey) return STBI_grey_alpha;
5380  // fallthrough
5381  case 15: if(is_rgb16) *is_rgb16 = 1;
5382  return STBI_rgb;
5383  case 24: // fallthrough
5384  case 32: return bits_per_pixel/8;
5385  default: return 0;
5386  }
5387 }
5388 
5389 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5390 {
5391  int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5392  int sz, tga_colormap_type;
5393  stbi__get8(s); // discard Offset
5394  tga_colormap_type = stbi__get8(s); // colormap type
5395  if( tga_colormap_type > 1 ) {
5396  stbi__rewind(s);
5397  return 0; // only RGB or indexed allowed
5398  }
5399  tga_image_type = stbi__get8(s); // image type
5400  if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5401  if (tga_image_type != 1 && tga_image_type != 9) {
5402  stbi__rewind(s);
5403  return 0;
5404  }
5405  stbi__skip(s,4); // skip index of first colormap entry and number of entries
5406  sz = stbi__get8(s); // check bits per palette color entry
5407  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5408  stbi__rewind(s);
5409  return 0;
5410  }
5411  stbi__skip(s,4); // skip image x and y origin
5412  tga_colormap_bpp = sz;
5413  } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5414  if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5415  stbi__rewind(s);
5416  return 0; // only RGB or grey allowed, +/- RLE
5417  }
5418  stbi__skip(s,9); // skip colormap specification and image x/y origin
5419  tga_colormap_bpp = 0;
5420  }
5421  tga_w = stbi__get16le(s);
5422  if( tga_w < 1 ) {
5423  stbi__rewind(s);
5424  return 0; // test width
5425  }
5426  tga_h = stbi__get16le(s);
5427  if( tga_h < 1 ) {
5428  stbi__rewind(s);
5429  return 0; // test height
5430  }
5431  tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5432  stbi__get8(s); // ignore alpha bits
5433  if (tga_colormap_bpp != 0) {
5434  if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5435  // when using a colormap, tga_bits_per_pixel is the size of the indexes
5436  // I don't think anything but 8 or 16bit indexes makes sense
5437  stbi__rewind(s);
5438  return 0;
5439  }
5440  tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5441  } else {
5442  tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5443  }
5444  if(!tga_comp) {
5445  stbi__rewind(s);
5446  return 0;
5447  }
5448  if (x) *x = tga_w;
5449  if (y) *y = tga_h;
5450  if (comp) *comp = tga_comp;
5451  return 1; // seems to have passed everything
5452 }
5453 
5454 static int stbi__tga_test(stbi__context *s)
5455 {
5456  int res = 0;
5457  int sz, tga_color_type;
5458  stbi__get8(s); // discard Offset
5459  tga_color_type = stbi__get8(s); // color type
5460  if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
5461  sz = stbi__get8(s); // image type
5462  if ( tga_color_type == 1 ) { // colormapped (paletted) image
5463  if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5464  stbi__skip(s,4); // skip index of first colormap entry and number of entries
5465  sz = stbi__get8(s); // check bits per palette color entry
5466  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5467  stbi__skip(s,4); // skip image x and y origin
5468  } else { // "normal" image w/o colormap
5469  if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5470  stbi__skip(s,9); // skip colormap specification and image x/y origin
5471  }
5472  if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
5473  if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
5474  sz = stbi__get8(s); // bits per pixel
5475  if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5476  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5477 
5478  res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5479 
5480 errorEnd:
5481  stbi__rewind(s);
5482  return res;
5483 }
5484 
5485 // read 16bit value and convert to 24bit RGB
5486 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5487 {
5488  stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5489  stbi__uint16 fiveBitMask = 31;
5490  // we have 3 channels with 5bits each
5491  int r = (px >> 10) & fiveBitMask;
5492  int g = (px >> 5) & fiveBitMask;
5493  int b = px & fiveBitMask;
5494  // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5495  out[0] = (stbi_uc)((r * 255)/31);
5496  out[1] = (stbi_uc)((g * 255)/31);
5497  out[2] = (stbi_uc)((b * 255)/31);
5498 
5499  // some people claim that the most significant bit might be used for alpha
5500  // (possibly if an alpha-bit is set in the "image descriptor byte")
5501  // but that only made 16bit test images completely translucent..
5502  // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5503 }
5504 
5505 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5506 {
5507  // read in the TGA header stuff
5508  int tga_offset = stbi__get8(s);
5509  int tga_indexed = stbi__get8(s);
5510  int tga_image_type = stbi__get8(s);
5511  int tga_is_RLE = 0;
5512  int tga_palette_start = stbi__get16le(s);
5513  int tga_palette_len = stbi__get16le(s);
5514  int tga_palette_bits = stbi__get8(s);
5515  int tga_x_origin = stbi__get16le(s);
5516  int tga_y_origin = stbi__get16le(s);
5517  int tga_width = stbi__get16le(s);
5518  int tga_height = stbi__get16le(s);
5519  int tga_bits_per_pixel = stbi__get8(s);
5520  int tga_comp, tga_rgb16=0;
5521  int tga_inverted = stbi__get8(s);
5522  // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5523  // image data
5524  unsigned char *tga_data;
5525  unsigned char *tga_palette = NULL;
5526  int i, j;
5527  unsigned char raw_data[4] = {0};
5528  int RLE_count = 0;
5529  int RLE_repeating = 0;
5530  int read_next_pixel = 1;
5531  STBI_NOTUSED(ri);
5532 
5533  // do a tiny bit of precessing
5534  if ( tga_image_type >= 8 )
5535  {
5536  tga_image_type -= 8;
5537  tga_is_RLE = 1;
5538  }
5539  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5540 
5541  // If I'm paletted, then I'll use the number of bits from the palette
5542  if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5543  else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5544 
5545  if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5546  return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5547 
5548  // tga info
5549  *x = tga_width;
5550  *y = tga_height;
5551  if (comp) *comp = tga_comp;
5552 
5553  if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5554  return stbi__errpuc("too large", "Corrupt TGA");
5555 
5556  tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5557  if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5558 
5559  // skip to the data's starting position (offset usually = 0)
5560  stbi__skip(s, tga_offset );
5561 
5562  if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5563  for (i=0; i < tga_height; ++i) {
5564  int row = tga_inverted ? tga_height -i - 1 : i;
5565  stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5566  stbi__getn(s, tga_row, tga_width * tga_comp);
5567  }
5568  } else {
5569  // do I need to load a palette?
5570  if ( tga_indexed)
5571  {
5572  // any data to skip? (offset usually = 0)
5573  stbi__skip(s, tga_palette_start );
5574  // load the palette
5575  tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5576  if (!tga_palette) {
5577  STBI_FREE(tga_data);
5578  return stbi__errpuc("outofmem", "Out of memory");
5579  }
5580  if (tga_rgb16) {
5581  stbi_uc *pal_entry = tga_palette;
5582  STBI_ASSERT(tga_comp == STBI_rgb);
5583  for (i=0; i < tga_palette_len; ++i) {
5584  stbi__tga_read_rgb16(s, pal_entry);
5585  pal_entry += tga_comp;
5586  }
5587  } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5588  STBI_FREE(tga_data);
5589  STBI_FREE(tga_palette);
5590  return stbi__errpuc("bad palette", "Corrupt TGA");
5591  }
5592  }
5593  // load the data
5594  for (i=0; i < tga_width * tga_height; ++i)
5595  {
5596  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5597  if ( tga_is_RLE )
5598  {
5599  if ( RLE_count == 0 )
5600  {
5601  // yep, get the next byte as a RLE command
5602  int RLE_cmd = stbi__get8(s);
5603  RLE_count = 1 + (RLE_cmd & 127);
5604  RLE_repeating = RLE_cmd >> 7;
5605  read_next_pixel = 1;
5606  } else if ( !RLE_repeating )
5607  {
5608  read_next_pixel = 1;
5609  }
5610  } else
5611  {
5612  read_next_pixel = 1;
5613  }
5614  // OK, if I need to read a pixel, do it now
5615  if ( read_next_pixel )
5616  {
5617  // load however much data we did have
5618  if ( tga_indexed )
5619  {
5620  // read in index, then perform the lookup
5621  int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5622  if ( pal_idx >= tga_palette_len ) {
5623  // invalid index
5624  pal_idx = 0;
5625  }
5626  pal_idx *= tga_comp;
5627  for (j = 0; j < tga_comp; ++j) {
5628  raw_data[j] = tga_palette[pal_idx+j];
5629  }
5630  } else if(tga_rgb16) {
5631  STBI_ASSERT(tga_comp == STBI_rgb);
5632  stbi__tga_read_rgb16(s, raw_data);
5633  } else {
5634  // read in the data raw
5635  for (j = 0; j < tga_comp; ++j) {
5636  raw_data[j] = stbi__get8(s);
5637  }
5638  }
5639  // clear the reading flag for the next pixel
5640  read_next_pixel = 0;
5641  } // end of reading a pixel
5642 
5643  // copy data
5644  for (j = 0; j < tga_comp; ++j)
5645  tga_data[i*tga_comp+j] = raw_data[j];
5646 
5647  // in case we're in RLE mode, keep counting down
5648  --RLE_count;
5649  }
5650  // do I need to invert the image?
5651  if ( tga_inverted )
5652  {
5653  for (j = 0; j*2 < tga_height; ++j)
5654  {
5655  int index1 = j * tga_width * tga_comp;
5656  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5657  for (i = tga_width * tga_comp; i > 0; --i)
5658  {
5659  unsigned char temp = tga_data[index1];
5660  tga_data[index1] = tga_data[index2];
5661  tga_data[index2] = temp;
5662  ++index1;
5663  ++index2;
5664  }
5665  }
5666  }
5667  // clear my palette, if I had one
5668  if ( tga_palette != NULL )
5669  {
5670  STBI_FREE( tga_palette );
5671  }
5672  }
5673 
5674  // swap RGB - if the source data was RGB16, it already is in the right order
5675  if (tga_comp >= 3 && !tga_rgb16)
5676  {
5677  unsigned char* tga_pixel = tga_data;
5678  for (i=0; i < tga_width * tga_height; ++i)
5679  {
5680  unsigned char temp = tga_pixel[0];
5681  tga_pixel[0] = tga_pixel[2];
5682  tga_pixel[2] = temp;
5683  tga_pixel += tga_comp;
5684  }
5685  }
5686 
5687  // convert to target component count
5688  if (req_comp && req_comp != tga_comp)
5689  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5690 
5691  // the things I do to get rid of an error message, and yet keep
5692  // Microsoft's C compilers happy... [8^(
5693  tga_palette_start = tga_palette_len = tga_palette_bits =
5694  tga_x_origin = tga_y_origin = 0;
5695  // OK, done
5696  return tga_data;
5697 }
5698 #endif
5699 
5700 // *************************************************************************************************
5701 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5702 
5703 #ifndef STBI_NO_PSD
5704 static int stbi__psd_test(stbi__context *s)
5705 {
5706  int r = (stbi__get32be(s) == 0x38425053);
5707  stbi__rewind(s);
5708  return r;
5709 }
5710 
5711 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5712 {
5713  int count, nleft, len;
5714 
5715  count = 0;
5716  while ((nleft = pixelCount - count) > 0) {
5717  len = stbi__get8(s);
5718  if (len == 128) {
5719  // No-op.
5720  } else if (len < 128) {
5721  // Copy next len+1 bytes literally.
5722  len++;
5723  if (len > nleft) return 0; // corrupt data
5724  count += len;
5725  while (len) {
5726  *p = stbi__get8(s);
5727  p += 4;
5728  len--;
5729  }
5730  } else if (len > 128) {
5731  stbi_uc val;
5732  // Next -len+1 bytes in the dest are replicated from next source byte.
5733  // (Interpret len as a negative 8-bit int.)
5734  len = 257 - len;
5735  if (len > nleft) return 0; // corrupt data
5736  val = stbi__get8(s);
5737  count += len;
5738  while (len) {
5739  *p = val;
5740  p += 4;
5741  len--;
5742  }
5743  }
5744  }
5745 
5746  return 1;
5747 }
5748 
5749 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5750 {
5751  int pixelCount;
5752  int channelCount, compression;
5753  int channel, i;
5754  int bitdepth;
5755  int w,h;
5756  stbi_uc *out;
5757  STBI_NOTUSED(ri);
5758 
5759  // Check identifier
5760  if (stbi__get32be(s) != 0x38425053) // "8BPS"
5761  return stbi__errpuc("not PSD", "Corrupt PSD image");
5762 
5763  // Check file type version.
5764  if (stbi__get16be(s) != 1)
5765  return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5766 
5767  // Skip 6 reserved bytes.
5768  stbi__skip(s, 6 );
5769 
5770  // Read the number of channels (R, G, B, A, etc).
5771  channelCount = stbi__get16be(s);
5772  if (channelCount < 0 || channelCount > 16)
5773  return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5774 
5775  // Read the rows and columns of the image.
5776  h = stbi__get32be(s);
5777  w = stbi__get32be(s);
5778 
5779  // Make sure the depth is 8 bits.
5780  bitdepth = stbi__get16be(s);
5781  if (bitdepth != 8 && bitdepth != 16)
5782  return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5783 
5784  // Make sure the color mode is RGB.
5785  // Valid options are:
5786  // 0: Bitmap
5787  // 1: Grayscale
5788  // 2: Indexed color
5789  // 3: RGB color
5790  // 4: CMYK color
5791  // 7: Multichannel
5792  // 8: Duotone
5793  // 9: Lab color
5794  if (stbi__get16be(s) != 3)
5795  return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5796 
5797  // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
5798  stbi__skip(s,stbi__get32be(s) );
5799 
5800  // Skip the image resources. (resolution, pen tool paths, etc)
5801  stbi__skip(s, stbi__get32be(s) );
5802 
5803  // Skip the reserved data.
5804  stbi__skip(s, stbi__get32be(s) );
5805 
5806  // Find out if the data is compressed.
5807  // Known values:
5808  // 0: no compression
5809  // 1: RLE compressed
5810  compression = stbi__get16be(s);
5811  if (compression > 1)
5812  return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5813 
5814  // Check size
5815  if (!stbi__mad3sizes_valid(4, w, h, 0))
5816  return stbi__errpuc("too large", "Corrupt PSD");
5817 
5818  // Create the destination image.
5819 
5820  if (!compression && bitdepth == 16 && bpc == 16) {
5821  out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5822  ri->bits_per_channel = 16;
5823  } else
5824  out = (stbi_uc *) stbi__malloc(4 * w*h);
5825 
5826  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5827  pixelCount = w*h;
5828 
5829  // Initialize the data to zero.
5830  //memset( out, 0, pixelCount * 4 );
5831 
5832  // Finally, the image data.
5833  if (compression) {
5834  // RLE as used by .PSD and .TIFF
5835  // Loop until you get the number of unpacked bytes you are expecting:
5836  // Read the next source byte into n.
5837  // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5838  // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5839  // Else if n is 128, noop.
5840  // Endloop
5841 
5842  // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5843  // which we're going to just skip.
5844  stbi__skip(s, h * channelCount * 2 );
5845 
5846  // Read the RLE data by channel.
5847  for (channel = 0; channel < 4; channel++) {
5848  stbi_uc *p;
5849 
5850  p = out+channel;
5851  if (channel >= channelCount) {
5852  // Fill this channel with default data.
5853  for (i = 0; i < pixelCount; i++, p += 4)
5854  *p = (channel == 3 ? 255 : 0);
5855  } else {
5856  // Read the RLE data.
5857  if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5858  STBI_FREE(out);
5859  return stbi__errpuc("corrupt", "bad RLE data");
5860  }
5861  }
5862  }
5863 
5864  } else {
5865  // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5866  // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5867 
5868  // Read the data by channel.
5869  for (channel = 0; channel < 4; channel++) {
5870  if (channel >= channelCount) {
5871  // Fill this channel with default data.
5872  if (bitdepth == 16 && bpc == 16) {
5873  stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5874  stbi__uint16 val = channel == 3 ? 65535 : 0;
5875  for (i = 0; i < pixelCount; i++, q += 4)
5876  *q = val;
5877  } else {
5878  stbi_uc *p = out+channel;
5879  stbi_uc val = channel == 3 ? 255 : 0;
5880  for (i = 0; i < pixelCount; i++, p += 4)
5881  *p = val;
5882  }
5883  } else {
5884  if (ri->bits_per_channel == 16) { // output bpc
5885  stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5886  for (i = 0; i < pixelCount; i++, q += 4)
5887  *q = (stbi__uint16) stbi__get16be(s);
5888  } else {
5889  stbi_uc *p = out+channel;
5890  if (bitdepth == 16) { // input bpc
5891  for (i = 0; i < pixelCount; i++, p += 4)
5892  *p = (stbi_uc) (stbi__get16be(s) >> 8);
5893  } else {
5894  for (i = 0; i < pixelCount; i++, p += 4)
5895  *p = stbi__get8(s);
5896  }
5897  }
5898  }
5899  }
5900  }
5901 
5902  // remove weird white matte from PSD
5903  if (channelCount >= 4) {
5904  if (ri->bits_per_channel == 16) {
5905  for (i=0; i < w*h; ++i) {
5906  stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5907  if (pixel[3] != 0 && pixel[3] != 65535) {
5908  float a = pixel[3] / 65535.0f;
5909  float ra = 1.0f / a;
5910  float inv_a = 65535.0f * (1 - ra);
5911  pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5912  pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5913  pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5914  }
5915  }
5916  } else {
5917  for (i=0; i < w*h; ++i) {
5918  unsigned char *pixel = out + 4*i;
5919  if (pixel[3] != 0 && pixel[3] != 255) {
5920  float a = pixel[3] / 255.0f;
5921  float ra = 1.0f / a;
5922  float inv_a = 255.0f * (1 - ra);
5923  pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5924  pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5925  pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5926  }
5927  }
5928  }
5929  }
5930 
5931  // convert to desired output format
5932  if (req_comp && req_comp != 4) {
5933  if (ri->bits_per_channel == 16)
5934  out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5935  else
5936  out = stbi__convert_format(out, 4, req_comp, w, h);
5937  if (out == NULL) return out; // stbi__convert_format frees input on failure
5938  }
5939 
5940  if (comp) *comp = 4;
5941  *y = h;
5942  *x = w;
5943 
5944  return out;
5945 }
5946 #endif
5947 
5948 // *************************************************************************************************
5949 // Softimage PIC loader
5950 // by Tom Seddon
5951 //
5952 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5953 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5954 
5955 #ifndef STBI_NO_PIC
5956 static int stbi__pic_is4(stbi__context *s,const char *str)
5957 {
5958  int i;
5959  for (i=0; i<4; ++i)
5960  if (stbi__get8(s) != (stbi_uc)str[i])
5961  return 0;
5962 
5963  return 1;
5964 }
5965 
5966 static int stbi__pic_test_core(stbi__context *s)
5967 {
5968  int i;
5969 
5970  if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5971  return 0;
5972 
5973  for(i=0;i<84;++i)
5974  stbi__get8(s);
5975 
5976  if (!stbi__pic_is4(s,"PICT"))
5977  return 0;
5978 
5979  return 1;
5980 }
5981 
5982 typedef struct
5983 {
5984  stbi_uc size,type,channel;
5985 } stbi__pic_packet;
5986 
5987 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5988 {
5989  int mask=0x80, i;
5990 
5991  for (i=0; i<4; ++i, mask>>=1) {
5992  if (channel & mask) {
5993  if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5994  dest[i]=stbi__get8(s);
5995  }
5996  }
5997 
5998  return dest;
5999 }
6000 
6001 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6002 {
6003  int mask=0x80,i;
6004 
6005  for (i=0;i<4; ++i, mask>>=1)
6006  if (channel&mask)
6007  dest[i]=src[i];
6008 }
6009 
6010 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6011 {
6012  int act_comp=0,num_packets=0,y,chained;
6013  stbi__pic_packet packets[10];
6014 
6015  // this will (should...) cater for even some bizarre stuff like having data
6016  // for the same channel in multiple packets.
6017  do {
6018  stbi__pic_packet *packet;
6019 
6020  if (num_packets==sizeof(packets)/sizeof(packets[0]))
6021  return stbi__errpuc("bad format","too many packets");
6022 
6023  packet = &packets[num_packets++];
6024 
6025  chained = stbi__get8(s);
6026  packet->size = stbi__get8(s);
6027  packet->type = stbi__get8(s);
6028  packet->channel = stbi__get8(s);
6029 
6030  act_comp |= packet->channel;
6031 
6032  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
6033  if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
6034  } while (chained);
6035 
6036  *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6037 
6038  for(y=0; y<height; ++y) {
6039  int packet_idx;
6040 
6041  for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6042  stbi__pic_packet *packet = &packets[packet_idx];
6043  stbi_uc *dest = result+y*width*4;
6044 
6045  switch (packet->type) {
6046  default:
6047  return stbi__errpuc("bad format","packet has bad compression type");
6048 
6049  case 0: {//uncompressed
6050  int x;
6051 
6052  for(x=0;x<width;++x, dest+=4)
6053  if (!stbi__readval(s,packet->channel,dest))
6054  return 0;
6055  break;
6056  }
6057 
6058  case 1://Pure RLE
6059  {
6060  int left=width, i;
6061 
6062  while (left>0) {
6063  stbi_uc count,value[4];
6064 
6065  count=stbi__get8(s);
6066  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
6067 
6068  if (count > left)
6069  count = (stbi_uc) left;
6070 
6071  if (!stbi__readval(s,packet->channel,value)) return 0;
6072 
6073  for(i=0; i<count; ++i,dest+=4)
6074  stbi__copyval(packet->channel,dest,value);
6075  left -= count;
6076  }
6077  }
6078  break;
6079 
6080  case 2: {//Mixed RLE
6081  int left=width;
6082  while (left>0) {
6083  int count = stbi__get8(s), i;
6084  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
6085 
6086  if (count >= 128) { // Repeated
6087  stbi_uc value[4];
6088 
6089  if (count==128)
6090  count = stbi__get16be(s);
6091  else
6092  count -= 127;
6093  if (count > left)
6094  return stbi__errpuc("bad file","scanline overrun");
6095 
6096  if (!stbi__readval(s,packet->channel,value))
6097  return 0;
6098 
6099  for(i=0;i<count;++i, dest += 4)
6100  stbi__copyval(packet->channel,dest,value);
6101  } else { // Raw
6102  ++count;
6103  if (count>left) return stbi__errpuc("bad file","scanline overrun");
6104 
6105  for(i=0;i<count;++i, dest+=4)
6106  if (!stbi__readval(s,packet->channel,dest))
6107  return 0;
6108  }
6109  left-=count;
6110  }
6111  break;
6112  }
6113  }
6114  }
6115  }
6116 
6117  return result;
6118 }
6119 
6120 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6121 {
6122  stbi_uc *result;
6123  int i, x,y, internal_comp;
6124  STBI_NOTUSED(ri);
6125 
6126  if (!comp) comp = &internal_comp;
6127 
6128  for (i=0; i<92; ++i)
6129  stbi__get8(s);
6130 
6131  x = stbi__get16be(s);
6132  y = stbi__get16be(s);
6133  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
6134  if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6135 
6136  stbi__get32be(s); //skip `ratio'
6137  stbi__get16be(s); //skip `fields'
6138  stbi__get16be(s); //skip `pad'
6139 
6140  // intermediate buffer is RGBA
6141  result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6142  memset(result, 0xff, x*y*4);
6143 
6144  if (!stbi__pic_load_core(s,x,y,comp, result)) {
6145  STBI_FREE(result);
6146  result=0;
6147  }
6148  *px = x;
6149  *py = y;
6150  if (req_comp == 0) req_comp = *comp;
6151  result=stbi__convert_format(result,4,req_comp,x,y);
6152 
6153  return result;
6154 }
6155 
6156 static int stbi__pic_test(stbi__context *s)
6157 {
6158  int r = stbi__pic_test_core(s);
6159  stbi__rewind(s);
6160  return r;
6161 }
6162 #endif
6163 
6164 // *************************************************************************************************
6165 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6166 
6167 #ifndef STBI_NO_GIF
6168 typedef struct
6169 {
6170  stbi__int16 prefix;
6171  stbi_uc first;
6172  stbi_uc suffix;
6173 } stbi__gif_lzw;
6174 
6175 typedef struct
6176 {
6177  int w,h;
6178  stbi_uc *out; // output buffer (always 4 components)
6179  stbi_uc *background; // The current "background" as far as a gif is concerned
6180  stbi_uc *history;
6181  int flags, bgindex, ratio, transparent, eflags;
6182  stbi_uc pal[256][4];
6183  stbi_uc lpal[256][4];
6184  stbi__gif_lzw codes[8192];
6185  stbi_uc *color_table;
6186  int parse, step;
6187  int lflags;
6188  int start_x, start_y;
6189  int max_x, max_y;
6190  int cur_x, cur_y;
6191  int line_size;
6192  int delay;
6193 } stbi__gif;
6194 
6195 static int stbi__gif_test_raw(stbi__context *s)
6196 {
6197  int sz;
6198  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6199  sz = stbi__get8(s);
6200  if (sz != '9' && sz != '7') return 0;
6201  if (stbi__get8(s) != 'a') return 0;
6202  return 1;
6203 }
6204 
6205 static int stbi__gif_test(stbi__context *s)
6206 {
6207  int r = stbi__gif_test_raw(s);
6208  stbi__rewind(s);
6209  return r;
6210 }
6211 
6212 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6213 {
6214  int i;
6215  for (i=0; i < num_entries; ++i) {
6216  pal[i][2] = stbi__get8(s);
6217  pal[i][1] = stbi__get8(s);
6218  pal[i][0] = stbi__get8(s);
6219  pal[i][3] = transp == i ? 0 : 255;
6220  }
6221 }
6222 
6223 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6224 {
6225  stbi_uc version;
6226  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6227  return stbi__err("not GIF", "Corrupt GIF");
6228 
6229  version = stbi__get8(s);
6230  if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
6231  if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
6232 
6233  stbi__g_failure_reason = "";
6234  g->w = stbi__get16le(s);
6235  g->h = stbi__get16le(s);
6236  g->flags = stbi__get8(s);
6237  g->bgindex = stbi__get8(s);
6238  g->ratio = stbi__get8(s);
6239  g->transparent = -1;
6240 
6241  if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
6242 
6243  if (is_info) return 1;
6244 
6245  if (g->flags & 0x80)
6246  stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6247 
6248  return 1;
6249 }
6250 
6251 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6252 {
6253  stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6254  if (!stbi__gif_header(s, g, comp, 1)) {
6255  STBI_FREE(g);
6256  stbi__rewind( s );
6257  return 0;
6258  }
6259  if (x) *x = g->w;
6260  if (y) *y = g->h;
6261  STBI_FREE(g);
6262  return 1;
6263 }
6264 
6265 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6266 {
6267  stbi_uc *p, *c;
6268  int idx;
6269 
6270  // recurse to decode the prefixes, since the linked-list is backwards,
6271  // and working backwards through an interleaved image would be nasty
6272  if (g->codes[code].prefix >= 0)
6273  stbi__out_gif_code(g, g->codes[code].prefix);
6274 
6275  if (g->cur_y >= g->max_y) return;
6276 
6277  idx = g->cur_x + g->cur_y;
6278  p = &g->out[idx];
6279  g->history[idx / 4] = 1;
6280 
6281  c = &g->color_table[g->codes[code].suffix * 4];
6282  if (c[3] > 128) { // don't render transparent pixels;
6283  p[0] = c[2];
6284  p[1] = c[1];
6285  p[2] = c[0];
6286  p[3] = c[3];
6287  }
6288  g->cur_x += 4;
6289 
6290  if (g->cur_x >= g->max_x) {
6291  g->cur_x = g->start_x;
6292  g->cur_y += g->step;
6293 
6294  while (g->cur_y >= g->max_y && g->parse > 0) {
6295  g->step = (1 << g->parse) * g->line_size;
6296  g->cur_y = g->start_y + (g->step >> 1);
6297  --g->parse;
6298  }
6299  }
6300 }
6301 
6302 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6303 {
6304  stbi_uc lzw_cs;
6305  stbi__int32 len, init_code;
6306  stbi__uint32 first;
6307  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6308  stbi__gif_lzw *p;
6309 
6310  lzw_cs = stbi__get8(s);
6311  if (lzw_cs > 12) return NULL;
6312  clear = 1 << lzw_cs;
6313  first = 1;
6314  codesize = lzw_cs + 1;
6315  codemask = (1 << codesize) - 1;
6316  bits = 0;
6317  valid_bits = 0;
6318  for (init_code = 0; init_code < clear; init_code++) {
6319  g->codes[init_code].prefix = -1;
6320  g->codes[init_code].first = (stbi_uc) init_code;
6321  g->codes[init_code].suffix = (stbi_uc) init_code;
6322  }
6323 
6324  // support no starting clear code
6325  avail = clear+2;
6326  oldcode = -1;
6327 
6328  len = 0;
6329  for(;;) {
6330  if (valid_bits < codesize) {
6331  if (len == 0) {
6332  len = stbi__get8(s); // start new block
6333  if (len == 0)
6334  return g->out;
6335  }
6336  --len;
6337  bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6338  valid_bits += 8;
6339  } else {
6340  stbi__int32 code = bits & codemask;
6341  bits >>= codesize;
6342  valid_bits -= codesize;
6343  // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6344  if (code == clear) { // clear code
6345  codesize = lzw_cs + 1;
6346  codemask = (1 << codesize) - 1;
6347  avail = clear + 2;
6348  oldcode = -1;
6349  first = 0;
6350  } else if (code == clear + 1) { // end of stream code
6351  stbi__skip(s, len);
6352  while ((len = stbi__get8(s)) > 0)
6353  stbi__skip(s,len);
6354  return g->out;
6355  } else if (code <= avail) {
6356  if (first) {
6357  return stbi__errpuc("no clear code", "Corrupt GIF");
6358  }
6359 
6360  if (oldcode >= 0) {
6361  p = &g->codes[avail++];
6362  if (avail > 8192) {
6363  return stbi__errpuc("too many codes", "Corrupt GIF");
6364  }
6365 
6366  p->prefix = (stbi__int16) oldcode;
6367  p->first = g->codes[oldcode].first;
6368  p->suffix = (code == avail) ? p->first : g->codes[code].first;
6369  } else if (code == avail)
6370  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6371 
6372  stbi__out_gif_code(g, (stbi__uint16) code);
6373 
6374  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6375  codesize++;
6376  codemask = (1 << codesize) - 1;
6377  }
6378 
6379  oldcode = code;
6380  } else {
6381  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6382  }
6383  }
6384  }
6385 }
6386 
6387 // this function is designed to support animated gifs, although stb_image doesn't support it
6388 // two back is the image from two frames ago, used for a very specific disposal format
6389 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6390 {
6391  int dispose;
6392  int first_frame;
6393  int pi;
6394  int pcount;
6395 
6396  // on first frame, any non-written pixels get the background colour (non-transparent)
6397  first_frame = 0;
6398  if (g->out == 0) {
6399  if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6400  g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6401  g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6402  g->history = (stbi_uc *) stbi__malloc(g->w * g->h);
6403  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6404 
6405  // image is treated as "tranparent" at the start - ie, nothing overwrites the current background;
6406  // background colour is only used for pixels that are not rendered first frame, after that "background"
6407  // color refers to teh color that was there the previous frame.
6408  memset( g->out, 0x00, 4 * g->w * g->h );
6409  memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent)
6410  memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
6411  first_frame = 1;
6412  } else {
6413  // second frame - how do we dispoase of the previous one?
6414  dispose = (g->eflags & 0x1C) >> 2;
6415  pcount = g->w * g->h;
6416 
6417  if ((dispose == 3) && (two_back == 0)) {
6418  dispose = 2; // if I don't have an image to revert back to, default to the old background
6419  }
6420 
6421  if (dispose == 3) { // use previous graphic
6422  for (pi = 0; pi < pcount; ++pi) {
6423  if (g->history[pi]) {
6424  memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6425  }
6426  }
6427  } else if (dispose == 2) {
6428  // restore what was changed last frame to background before that frame;
6429  for (pi = 0; pi < pcount; ++pi) {
6430  if (g->history[pi]) {
6431  memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6432  }
6433  }
6434  } else {
6435  // This is a non-disposal case eithe way, so just
6436  // leave the pixels as is, and they will become the new background
6437  // 1: do not dispose
6438  // 0: not specified.
6439  }
6440 
6441  // background is what out is after the undoing of the previou frame;
6442  memcpy( g->background, g->out, 4 * g->w * g->h );
6443  }
6444 
6445  // clear my history;
6446  memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
6447 
6448  for (;;) {
6449  int tag = stbi__get8(s);
6450  switch (tag) {
6451  case 0x2C: /* Image Descriptor */
6452  {
6453  stbi__int32 x, y, w, h;
6454  stbi_uc *o;
6455 
6456  x = stbi__get16le(s);
6457  y = stbi__get16le(s);
6458  w = stbi__get16le(s);
6459  h = stbi__get16le(s);
6460  if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6461  return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6462 
6463  g->line_size = g->w * 4;
6464  g->start_x = x * 4;
6465  g->start_y = y * g->line_size;
6466  g->max_x = g->start_x + w * 4;
6467  g->max_y = g->start_y + h * g->line_size;
6468  g->cur_x = g->start_x;
6469  g->cur_y = g->start_y;
6470 
6471  g->lflags = stbi__get8(s);
6472 
6473  if (g->lflags & 0x40) {
6474  g->step = 8 * g->line_size; // first interlaced spacing
6475  g->parse = 3;
6476  } else {
6477  g->step = g->line_size;
6478  g->parse = 0;
6479  }
6480 
6481  if (g->lflags & 0x80) {
6482  stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6483  g->color_table = (stbi_uc *) g->lpal;
6484  } else if (g->flags & 0x80) {
6485  g->color_table = (stbi_uc *) g->pal;
6486  } else
6487  return stbi__errpuc("missing color table", "Corrupt GIF");
6488 
6489  o = stbi__process_gif_raster(s, g);
6490  if (o == NULL) return NULL;
6491 
6492  // if this was the first frame,
6493  pcount = g->w * g->h;
6494  if (first_frame && (g->bgindex > 0)) {
6495  // if first frame, any pixel not drawn to gets the background color
6496  for (pi = 0; pi < pcount; ++pi) {
6497  if (g->history[pi] == 0) {
6498  g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6499  memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6500  }
6501  }
6502  }
6503 
6504  return o;
6505  }
6506 
6507  case 0x21: // Comment Extension.
6508  {
6509  int len;
6510  int ext = stbi__get8(s);
6511  if (ext == 0xF9) { // Graphic Control Extension.
6512  len = stbi__get8(s);
6513  if (len == 4) {
6514  g->eflags = stbi__get8(s);
6515  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6516 
6517  // unset old transparent
6518  if (g->transparent >= 0) {
6519  g->pal[g->transparent][3] = 255;
6520  }
6521  if (g->eflags & 0x01) {
6522  g->transparent = stbi__get8(s);
6523  if (g->transparent >= 0) {
6524  g->pal[g->transparent][3] = 0;
6525  }
6526  } else {
6527  // don't need transparent
6528  stbi__skip(s, 1);
6529  g->transparent = -1;
6530  }
6531  } else {
6532  stbi__skip(s, len);
6533  break;
6534  }
6535  }
6536  while ((len = stbi__get8(s)) != 0) {
6537  stbi__skip(s, len);
6538  }
6539  break;
6540  }
6541 
6542  case 0x3B: // gif stream termination code
6543  return (stbi_uc *) s; // using '1' causes warning on some compilers
6544 
6545  default:
6546  return stbi__errpuc("unknown code", "Corrupt GIF");
6547  }
6548  }
6549 }
6550 
6551 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6552 {
6553  if (stbi__gif_test(s)) {
6554  int layers = 0;
6555  stbi_uc *u = 0;
6556  stbi_uc *out = 0;
6557  stbi_uc *two_back = 0;
6558  stbi__gif g;
6559  int stride;
6560  memset(&g, 0, sizeof(g));
6561  if (delays) {
6562  *delays = 0;
6563  }
6564 
6565  do {
6566  u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6567  if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
6568 
6569  if (u) {
6570  *x = g.w;
6571  *y = g.h;
6572  ++layers;
6573  stride = g.w * g.h * 4;
6574 
6575  if (out) {
6576  out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
6577  if (delays) {
6578  *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
6579  }
6580  } else {
6581  out = (stbi_uc*)stbi__malloc( layers * stride );
6582  if (delays) {
6583  *delays = (int*) stbi__malloc( layers * sizeof(int) );
6584  }
6585  }
6586  memcpy( out + ((layers - 1) * stride), u, stride );
6587  if (layers >= 2) {
6588  two_back = out - 2 * stride;
6589  }
6590 
6591  if (delays) {
6592  (*delays)[layers - 1U] = g.delay;
6593  }
6594  }
6595  } while (u != 0);
6596 
6597  // free temp buffer;
6598  STBI_FREE(g.out);
6599  STBI_FREE(g.history);
6600  STBI_FREE(g.background);
6601 
6602  // do the final conversion after loading everything;
6603  if (req_comp && req_comp != 4)
6604  out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6605 
6606  *z = layers;
6607  return out;
6608  } else {
6609  return stbi__errpuc("not GIF", "Image was not as a gif type.");
6610  }
6611 }
6612 
6613 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6614 {
6615  stbi_uc *u = 0;
6616  stbi__gif g;
6617  memset(&g, 0, sizeof(g));
6618 
6619  u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6620  if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
6621  if (u) {
6622  *x = g.w;
6623  *y = g.h;
6624 
6625  // moved conversion to after successful load so that the same
6626  // can be done for multiple frames.
6627  if (req_comp && req_comp != 4)
6628  u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6629  }
6630 
6631  // free buffers needed for multiple frame loading;
6632  STBI_FREE(g.history);
6633  STBI_FREE(g.background);
6634 
6635  return u;
6636 }
6637 
6638 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6639 {
6640  return stbi__gif_info_raw(s,x,y,comp);
6641 }
6642 #endif
6643 
6644 // *************************************************************************************************
6645 // Radiance RGBE HDR loader
6646 // originally by Nicolas Schulz
6647 #ifndef STBI_NO_HDR
6648 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6649 {
6650  int i;
6651  for (i=0; signature[i]; ++i)
6652  if (stbi__get8(s) != signature[i])
6653  return 0;
6654  stbi__rewind(s);
6655  return 1;
6656 }
6657 
6658 static int stbi__hdr_test(stbi__context* s)
6659 {
6660  int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6661  stbi__rewind(s);
6662  if(!r) {
6663  r = stbi__hdr_test_core(s, "#?RGBE\n");
6664  stbi__rewind(s);
6665  }
6666  return r;
6667 }
6668 
6669 #define STBI__HDR_BUFLEN 1024
6670 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6671 {
6672  int len=0;
6673  char c = '\0';
6674 
6675  c = (char) stbi__get8(z);
6676 
6677  while (!stbi__at_eof(z) && c != '\n') {
6678  buffer[len++] = c;
6679  if (len == STBI__HDR_BUFLEN-1) {
6680  // flush to end of line
6681  while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6682  ;
6683  break;
6684  }
6685  c = (char) stbi__get8(z);
6686  }
6687 
6688  buffer[len] = 0;
6689  return buffer;
6690 }
6691 
6692 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6693 {
6694  if ( input[3] != 0 ) {
6695  float f1;
6696  // Exponent
6697  f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6698  if (req_comp <= 2)
6699  output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6700  else {
6701  output[0] = input[0] * f1;
6702  output[1] = input[1] * f1;
6703  output[2] = input[2] * f1;
6704  }
6705  if (req_comp == 2) output[1] = 1;
6706  if (req_comp == 4) output[3] = 1;
6707  } else {
6708  switch (req_comp) {
6709  case 4: output[3] = 1; /* fallthrough */
6710  case 3: output[0] = output[1] = output[2] = 0;
6711  break;
6712  case 2: output[1] = 1; /* fallthrough */
6713  case 1: output[0] = 0;
6714  break;
6715  }
6716  }
6717 }
6718 
6719 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6720 {
6721  char buffer[STBI__HDR_BUFLEN];
6722  char *token;
6723  int valid = 0;
6724  int width, height;
6725  stbi_uc *scanline;
6726  float *hdr_data;
6727  int len;
6728  unsigned char count, value;
6729  int i, j, k, c1,c2, z;
6730  const char *headerToken;
6731  STBI_NOTUSED(ri);
6732 
6733  // Check identifier
6734  headerToken = stbi__hdr_gettoken(s,buffer);
6735  if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6736  return stbi__errpf("not HDR", "Corrupt HDR image");
6737 
6738  // Parse header
6739  for(;;) {
6740  token = stbi__hdr_gettoken(s,buffer);
6741  if (token[0] == 0) break;
6742  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6743  }
6744 
6745  if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
6746 
6747  // Parse width and height
6748  // can't use sscanf() if we're not using stdio!
6749  token = stbi__hdr_gettoken(s,buffer);
6750  if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6751  token += 3;
6752  height = (int) strtol(token, &token, 10);
6753  while (*token == ' ') ++token;
6754  if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6755  token += 3;
6756  width = (int) strtol(token, NULL, 10);
6757 
6758  *x = width;
6759  *y = height;
6760 
6761  if (comp) *comp = 3;
6762  if (req_comp == 0) req_comp = 3;
6763 
6764  if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6765  return stbi__errpf("too large", "HDR image is too large");
6766 
6767  // Read data
6768  hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6769  if (!hdr_data)
6770  return stbi__errpf("outofmem", "Out of memory");
6771 
6772  // Load image data
6773  // image data is stored as some number of sca
6774  if ( width < 8 || width >= 32768) {
6775  // Read flat data
6776  for (j=0; j < height; ++j) {
6777  for (i=0; i < width; ++i) {
6778  stbi_uc rgbe[4];
6779  main_decode_loop:
6780  stbi__getn(s, rgbe, 4);
6781  stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6782  }
6783  }
6784  } else {
6785  // Read RLE-encoded data
6786  scanline = NULL;
6787 
6788  for (j = 0; j < height; ++j) {
6789  c1 = stbi__get8(s);
6790  c2 = stbi__get8(s);
6791  len = stbi__get8(s);
6792  if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6793  // not run-length encoded, so we have to actually use THIS data as a decoded
6794  // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6795  stbi_uc rgbe[4];
6796  rgbe[0] = (stbi_uc) c1;
6797  rgbe[1] = (stbi_uc) c2;
6798  rgbe[2] = (stbi_uc) len;
6799  rgbe[3] = (stbi_uc) stbi__get8(s);
6800  stbi__hdr_convert(hdr_data, rgbe, req_comp);
6801  i = 1;
6802  j = 0;
6803  STBI_FREE(scanline);
6804  goto main_decode_loop; // yes, this makes no sense
6805  }
6806  len <<= 8;
6807  len |= stbi__get8(s);
6808  if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6809  if (scanline == NULL) {
6810  scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6811  if (!scanline) {
6812  STBI_FREE(hdr_data);
6813  return stbi__errpf("outofmem", "Out of memory");
6814  }
6815  }
6816 
6817  for (k = 0; k < 4; ++k) {
6818  int nleft;
6819  i = 0;
6820  while ((nleft = width - i) > 0) {
6821  count = stbi__get8(s);
6822  if (count > 128) {
6823  // Run
6824  value = stbi__get8(s);
6825  count -= 128;
6826  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6827  for (z = 0; z < count; ++z)
6828  scanline[i++ * 4 + k] = value;
6829  } else {
6830  // Dump
6831  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6832  for (z = 0; z < count; ++z)
6833  scanline[i++ * 4 + k] = stbi__get8(s);
6834  }
6835  }
6836  }
6837  for (i=0; i < width; ++i)
6838  stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6839  }
6840  if (scanline)
6841  STBI_FREE(scanline);
6842  }
6843 
6844  return hdr_data;
6845 }
6846 
6847 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6848 {
6849  char buffer[STBI__HDR_BUFLEN];
6850  char *token;
6851  int valid = 0;
6852  int dummy;
6853 
6854  if (!x) x = &dummy;
6855  if (!y) y = &dummy;
6856  if (!comp) comp = &dummy;
6857 
6858  if (stbi__hdr_test(s) == 0) {
6859  stbi__rewind( s );
6860  return 0;
6861  }
6862 
6863  for(;;) {
6864  token = stbi__hdr_gettoken(s,buffer);
6865  if (token[0] == 0) break;
6866  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6867  }
6868 
6869  if (!valid) {
6870  stbi__rewind( s );
6871  return 0;
6872  }
6873  token = stbi__hdr_gettoken(s,buffer);
6874  if (strncmp(token, "-Y ", 3)) {
6875  stbi__rewind( s );
6876  return 0;
6877  }
6878  token += 3;
6879  *y = (int) strtol(token, &token, 10);
6880  while (*token == ' ') ++token;
6881  if (strncmp(token, "+X ", 3)) {
6882  stbi__rewind( s );
6883  return 0;
6884  }
6885  token += 3;
6886  *x = (int) strtol(token, NULL, 10);
6887  *comp = 3;
6888  return 1;
6889 }
6890 #endif // STBI_NO_HDR
6891 
6892 #ifndef STBI_NO_BMP
6893 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6894 {
6895  void *p;
6896  stbi__bmp_data info;
6897 
6898  info.all_a = 255;
6899  p = stbi__bmp_parse_header(s, &info);
6900  stbi__rewind( s );
6901  if (p == NULL)
6902  return 0;
6903  if (x) *x = s->img_x;
6904  if (y) *y = s->img_y;
6905  if (comp) *comp = info.ma ? 4 : 3;
6906  return 1;
6907 }
6908 #endif
6909 
6910 #ifndef STBI_NO_PSD
6911 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6912 {
6913  int channelCount, dummy, depth;
6914  if (!x) x = &dummy;
6915  if (!y) y = &dummy;
6916  if (!comp) comp = &dummy;
6917  if (stbi__get32be(s) != 0x38425053) {
6918  stbi__rewind( s );
6919  return 0;
6920  }
6921  if (stbi__get16be(s) != 1) {
6922  stbi__rewind( s );
6923  return 0;
6924  }
6925  stbi__skip(s, 6);
6926  channelCount = stbi__get16be(s);
6927  if (channelCount < 0 || channelCount > 16) {
6928  stbi__rewind( s );
6929  return 0;
6930  }
6931  *y = stbi__get32be(s);
6932  *x = stbi__get32be(s);
6933  depth = stbi__get16be(s);
6934  if (depth != 8 && depth != 16) {
6935  stbi__rewind( s );
6936  return 0;
6937  }
6938  if (stbi__get16be(s) != 3) {
6939  stbi__rewind( s );
6940  return 0;
6941  }
6942  *comp = 4;
6943  return 1;
6944 }
6945 
6946 static int stbi__psd_is16(stbi__context *s)
6947 {
6948  int channelCount, depth;
6949  if (stbi__get32be(s) != 0x38425053) {
6950  stbi__rewind( s );
6951  return 0;
6952  }
6953  if (stbi__get16be(s) != 1) {
6954  stbi__rewind( s );
6955  return 0;
6956  }
6957  stbi__skip(s, 6);
6958  channelCount = stbi__get16be(s);
6959  if (channelCount < 0 || channelCount > 16) {
6960  stbi__rewind( s );
6961  return 0;
6962  }
6963  (void) stbi__get32be(s);
6964  (void) stbi__get32be(s);
6965  depth = stbi__get16be(s);
6966  if (depth != 16) {
6967  stbi__rewind( s );
6968  return 0;
6969  }
6970  return 1;
6971 }
6972 #endif
6973 
6974 #ifndef STBI_NO_PIC
6975 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6976 {
6977  int act_comp=0,num_packets=0,chained,dummy;
6978  stbi__pic_packet packets[10];
6979 
6980  if (!x) x = &dummy;
6981  if (!y) y = &dummy;
6982  if (!comp) comp = &dummy;
6983 
6984  if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6985  stbi__rewind(s);
6986  return 0;
6987  }
6988 
6989  stbi__skip(s, 88);
6990 
6991  *x = stbi__get16be(s);
6992  *y = stbi__get16be(s);
6993  if (stbi__at_eof(s)) {
6994  stbi__rewind( s);
6995  return 0;
6996  }
6997  if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6998  stbi__rewind( s );
6999  return 0;
7000  }
7001 
7002  stbi__skip(s, 8);
7003 
7004  do {
7005  stbi__pic_packet *packet;
7006 
7007  if (num_packets==sizeof(packets)/sizeof(packets[0]))
7008  return 0;
7009 
7010  packet = &packets[num_packets++];
7011  chained = stbi__get8(s);
7012  packet->size = stbi__get8(s);
7013  packet->type = stbi__get8(s);
7014  packet->channel = stbi__get8(s);
7015  act_comp |= packet->channel;
7016 
7017  if (stbi__at_eof(s)) {
7018  stbi__rewind( s );
7019  return 0;
7020  }
7021  if (packet->size != 8) {
7022  stbi__rewind( s );
7023  return 0;
7024  }
7025  } while (chained);
7026 
7027  *comp = (act_comp & 0x10 ? 4 : 3);
7028 
7029  return 1;
7030 }
7031 #endif
7032 
7033 // *************************************************************************************************
7034 // Portable Gray Map and Portable Pixel Map loader
7035 // by Ken Miller
7036 //
7037 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
7038 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
7039 //
7040 // Known limitations:
7041 // Does not support comments in the header section
7042 // Does not support ASCII image data (formats P2 and P3)
7043 // Does not support 16-bit-per-channel
7044 
7045 #ifndef STBI_NO_PNM
7046 
7047 static int stbi__pnm_test(stbi__context *s)
7048 {
7049  char p, t;
7050  p = (char) stbi__get8(s);
7051  t = (char) stbi__get8(s);
7052  if (p != 'P' || (t != '5' && t != '6')) {
7053  stbi__rewind( s );
7054  return 0;
7055  }
7056  return 1;
7057 }
7058 
7059 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7060 {
7061  stbi_uc *out;
7062  STBI_NOTUSED(ri);
7063 
7064  if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
7065  return 0;
7066 
7067  *x = s->img_x;
7068  *y = s->img_y;
7069  if (comp) *comp = s->img_n;
7070 
7071  if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7072  return stbi__errpuc("too large", "PNM too large");
7073