Subversion Repositories gelsvn

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
369 jab 1
/* stbi-1.16 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
2
                      when you control the images you're loading
3
 
4
   QUICK NOTES:
5
      Primarily of interest to game developers and other people who can
6
          avoid problematic images and only need the trivial interface
7
 
8
      JPEG baseline (no JPEG progressive, no oddball channel decimations)
9
      PNG non-interlaced
10
      BMP non-1bpp, non-RLE
11
      TGA (not sure what subset, if a subset)
12
      PSD (composited view only, no extra channels)
13
      HDR (radiance rgbE format)
14
      writes BMP,TGA (define STBI_NO_WRITE to remove code)
15
      decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
16
      supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
17
 
18
   TODO:
19
      stbi_info_*
20
 
21
   history:
22
      1.16   major bugfix - convert_format converted one too many pixels
23
      1.15   initialize some fields for thread safety
24
      1.14   fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
25
      1.13   threadsafe
26
      1.12   const qualifiers in the API
27
      1.11   Support installable IDCT, colorspace conversion routines
28
      1.10   Fixes for 64-bit (don't use "unsigned long")
29
             optimized upsampling by Fabian "ryg" Giesen
30
      1.09   Fix format-conversion for PSD code (bad global variables!)
31
      1.08   Thatcher Ulrich's PSD code integrated by Nicolas Schulz
32
      1.07   attempt to fix C++ warning/errors again
33
      1.06   attempt to fix C++ warning/errors again
34
      1.05   fix TGA loading to return correct *comp and use good luminance calc
35
      1.04   default float alpha is 1, not 255; use 'void *' for stbi_image_free
36
      1.03   bugfixes to STBI_NO_STDIO, STBI_NO_HDR
37
      1.02   support for (subset of) HDR files, float interface for preferred access to them
38
      1.01   fix bug: possible bug in handling right-side up bmps... not sure
39
             fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
40
      1.00   interface to zlib that skips zlib header
41
      0.99   correct handling of alpha in palette
42
      0.98   TGA loader by lonesock; dynamically add loaders (untested)
43
      0.97   jpeg errors on too large a file; also catch another malloc failure
44
      0.96   fix detection of invalid v value - particleman@mollyrocket forum
45
      0.95   during header scan, seek to markers in case of padding
46
      0.94   STBI_NO_STDIO to disable stdio usage; rename all #defines the same
47
      0.93   handle jpegtran output; verbose errors
48
      0.92   read 4,8,16,24,32-bit BMP files of several formats
49
      0.91   output 24-bit Windows 3.0 BMP files
50
      0.90   fix a few more warnings; bump version number to approach 1.0
51
      0.61   bugfixes due to Marc LeBlanc, Christopher Lloyd
52
      0.60   fix compiling as c++
53
      0.59   fix warnings: merge Dave Moore's -Wall fixes
54
      0.58   fix bug: zlib uncompressed mode len/nlen was wrong endian
55
      0.57   fix bug: jpg last huffman symbol before marker was >9 bits but less
56
                      than 16 available
57
      0.56   fix bug: zlib uncompressed mode len vs. nlen
58
      0.55   fix bug: restart_interval not initialized to 0
59
      0.54   allow NULL for 'int *comp'
60
      0.53   fix bug in png 3->4; speedup png decoding
61
      0.52   png handles req_comp=3,4 directly; minor cleanup; jpeg comments
62
      0.51   obey req_comp requests, 1-component jpegs return as 1-component,
63
             on 'test' only check type, not whether we support this variant
64
*/
65
 
66
#include "stb_image_aug.h"
67
 
68
#ifndef STBI_NO_HDR
69
#include <math.h>  // ldexp
70
#include <string.h> // strcmp
71
#endif
72
 
73
#ifndef STBI_NO_STDIO
74
#include <stdio.h>
75
#endif
76
#include <stdlib.h>
77
#include <memory.h>
78
#include <assert.h>
79
#include <stdarg.h>
80
 
81
#ifndef _MSC_VER
82
  #ifdef __cplusplus
83
  #define __forceinline inline
84
  #else
85
  #define __forceinline
86
  #endif
87
#endif
88
 
89
 
90
// implementation:
91
typedef unsigned char uint8;
92
typedef unsigned short uint16;
93
typedef   signed short  int16;
94
typedef unsigned int   uint32;
95
typedef   signed int    int32;
96
typedef unsigned int   uint;
97
 
98
// should produce compiler error if size is wrong
99
typedef unsigned char validate_uint32[sizeof(uint32)==4];
100
 
101
#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
102
#define STBI_NO_WRITE
103
#endif
104
 
105
#ifndef STBI_NO_DDS
106
#include "stbi_DDS_aug.h"
107
#endif
108
 
109
//	I (JLD) want full messages for SOIL
110
#define STBI_FAILURE_USERMSG 1
111
 
112
//////////////////////////////////////////////////////////////////////////////
113
//
114
// Generic API that works on all image types
115
//
116
 
117
// this is not threadsafe
118
static char *failure_reason;
119
 
120
char *stbi_failure_reason(void)
121
{
122
   return failure_reason;
123
}
124
 
125
static int e(char *str)
126
{
127
   failure_reason = str;
128
   return 0;
129
}
130
 
131
#ifdef STBI_NO_FAILURE_STRINGS
132
   #define e(x,y)  0
133
#elif defined(STBI_FAILURE_USERMSG)
134
   #define e(x,y)  e(y)
135
#else
136
   #define e(x,y)  e(x)
137
#endif
138
 
139
#define epf(x,y)   ((float *) (e(x,y)?NULL:NULL))
140
#define epuc(x,y)  ((unsigned char *) (e(x,y)?NULL:NULL))
141
 
142
void stbi_image_free(void *retval_from_stbi_load)
143
{
144
   free(retval_from_stbi_load);
145
}
146
 
147
#define MAX_LOADERS  32
148
stbi_loader *loaders[MAX_LOADERS];
149
static int max_loaders = 0;
150
 
151
int stbi_register_loader(stbi_loader *loader)
152
{
153
   int i;
154
   for (i=0; i < MAX_LOADERS; ++i) {
155
      // already present?
156
      if (loaders[i] == loader)
157
         return 1;
158
      // end of the list?
159
      if (loaders[i] == NULL) {
160
         loaders[i] = loader;
161
         max_loaders = i+1;
162
         return 1;
163
      }
164
   }
165
   // no room for it
166
   return 0;
167
}
168
 
169
#ifndef STBI_NO_HDR
170
static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
171
static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp);
172
#endif
173
 
174
#ifndef STBI_NO_STDIO
175
unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
176
{
177
   FILE *f = fopen(filename, "rb");
178
   unsigned char *result;
179
   if (!f) return epuc("can't fopen", "Unable to open file");
180
   result = stbi_load_from_file(f,x,y,comp,req_comp);
181
   fclose(f);
182
   return result;
183
}
184
 
185
unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
186
{
187
   int i;
188
   if (stbi_jpeg_test_file(f))
189
      return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
190
   if (stbi_png_test_file(f))
191
      return stbi_png_load_from_file(f,x,y,comp,req_comp);
192
   if (stbi_bmp_test_file(f))
193
      return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
194
   if (stbi_psd_test_file(f))
195
      return stbi_psd_load_from_file(f,x,y,comp,req_comp);
196
   #ifndef STBI_NO_DDS
197
   if (stbi_dds_test_file(f))
198
      return stbi_dds_load_from_file(f,x,y,comp,req_comp);
199
   #endif
200
   #ifndef STBI_NO_HDR
201
   if (stbi_hdr_test_file(f)) {
202
      float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
203
      return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
204
   }
205
   #endif
206
   for (i=0; i < max_loaders; ++i)
207
      if (loaders[i]->test_file(f))
208
         return loaders[i]->load_from_file(f,x,y,comp,req_comp);
209
   // test tga last because it's a crappy test!
210
   if (stbi_tga_test_file(f))
211
      return stbi_tga_load_from_file(f,x,y,comp,req_comp);
212
   return epuc("unknown image type", "Image not of any known type, or corrupt");
213
}
214
#endif
215
 
216
unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
217
{
218
   int i;
219
   if (stbi_jpeg_test_memory(buffer,len))
220
      return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
221
   if (stbi_png_test_memory(buffer,len))
222
      return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
223
   if (stbi_bmp_test_memory(buffer,len))
224
      return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
225
   if (stbi_psd_test_memory(buffer,len))
226
      return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
227
   #ifndef STBI_NO_DDS
228
   if (stbi_dds_test_memory(buffer,len))
229
      return stbi_dds_load_from_memory(buffer,len,x,y,comp,req_comp);
230
   #endif
231
   #ifndef STBI_NO_HDR
232
   if (stbi_hdr_test_memory(buffer, len)) {
233
      float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
234
      return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
235
   }
236
   #endif
237
   for (i=0; i < max_loaders; ++i)
238
      if (loaders[i]->test_memory(buffer,len))
239
         return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
240
   // test tga last because it's a crappy test!
241
   if (stbi_tga_test_memory(buffer,len))
242
      return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
243
   return epuc("unknown image type", "Image not of any known type, or corrupt");
244
}
245
 
246
#ifndef STBI_NO_HDR
247
 
248
#ifndef STBI_NO_STDIO
249
float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
250
{
251
   FILE *f = fopen(filename, "rb");
252
   float *result;
253
   if (!f) return epf("can't fopen", "Unable to open file");
254
   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
255
   fclose(f);
256
   return result;
257
}
258
 
259
float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
260
{
261
   unsigned char *data;
262
   #ifndef STBI_NO_HDR
263
   if (stbi_hdr_test_file(f))
264
      return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
265
   #endif
266
   data = stbi_load_from_file(f, x, y, comp, req_comp);
267
   if (data)
268
      return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
269
   return epf("unknown image type", "Image not of any known type, or corrupt");
270
}
271
#endif
272
 
273
float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
274
{
275
   stbi_uc *data;
276
   #ifndef STBI_NO_HDR
277
   if (stbi_hdr_test_memory(buffer, len))
278
      return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
279
   #endif
280
   data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
281
   if (data)
282
      return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
283
   return epf("unknown image type", "Image not of any known type, or corrupt");
284
}
285
#endif
286
 
287
// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
288
// defined, for API simplicity; if STBI_NO_HDR is defined, it always
289
// reports false!
290
 
291
int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
292
{
293
   #ifndef STBI_NO_HDR
294
   return stbi_hdr_test_memory(buffer, len);
295
   #else
296
   return 0;
297
   #endif
298
}
299
 
300
#ifndef STBI_NO_STDIO
301
extern int      stbi_is_hdr          (char const *filename)
302
{
303
   FILE *f = fopen(filename, "rb");
304
   int result=0;
305
   if (f) {
306
      result = stbi_is_hdr_from_file(f);
307
      fclose(f);
308
   }
309
   return result;
310
}
311
 
312
extern int      stbi_is_hdr_from_file(FILE *f)
313
{
314
   #ifndef STBI_NO_HDR
315
   return stbi_hdr_test_file(f);
316
   #else
317
   return 0;
318
   #endif
319
}
320
 
321
#endif
322
 
323
// @TODO: get image dimensions & components without fully decoding
324
#ifndef STBI_NO_STDIO
325
extern int      stbi_info            (char const *filename,           int *x, int *y, int *comp);
326
extern int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
327
#endif
328
extern int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
329
 
330
#ifndef STBI_NO_HDR
331
static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
332
static float l2h_gamma=2.2f, l2h_scale=1.0f;
333
 
334
void   stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
335
void   stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
336
 
337
void   stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
338
void   stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
339
#endif
340
 
341
 
342
//////////////////////////////////////////////////////////////////////////////
343
//
344
// Common code used by all image loaders
345
//
346
 
347
enum
348
{
349
   SCAN_load=0,
350
   SCAN_type,
351
   SCAN_header,
352
};
353
 
354
typedef struct
355
{
356
   uint32 img_x, img_y;
357
   int img_n, img_out_n;
358
 
359
   #ifndef STBI_NO_STDIO
360
   FILE  *img_file;
361
   #endif
362
   uint8 *img_buffer, *img_buffer_end;
363
} stbi;
364
 
365
#ifndef STBI_NO_STDIO
366
static void start_file(stbi *s, FILE *f)
367
{
368
   s->img_file = f;
369
}
370
#endif
371
 
372
static void start_mem(stbi *s, uint8 const *buffer, int len)
373
{
374
#ifndef STBI_NO_STDIO
375
   s->img_file = NULL;
376
#endif
377
   s->img_buffer = (uint8 *) buffer;
378
   s->img_buffer_end = (uint8 *) buffer+len;
379
}
380
 
381
__forceinline static int get8(stbi *s)
382
{
383
#ifndef STBI_NO_STDIO
384
   if (s->img_file) {
385
      int c = fgetc(s->img_file);
386
      return c == EOF ? 0 : c;
387
   }
388
#endif
389
   if (s->img_buffer < s->img_buffer_end)
390
      return *s->img_buffer++;
391
   return 0;
392
}
393
 
394
__forceinline static int at_eof(stbi *s)
395
{
396
#ifndef STBI_NO_STDIO
397
   if (s->img_file)
398
      return feof(s->img_file);
399
#endif
400
   return s->img_buffer >= s->img_buffer_end;
401
}
402
 
403
__forceinline static uint8 get8u(stbi *s)
404
{
405
   return (uint8) get8(s);
406
}
407
 
408
static void skip(stbi *s, int n)
409
{
410
#ifndef STBI_NO_STDIO
411
   if (s->img_file)
412
      fseek(s->img_file, n, SEEK_CUR);
413
   else
414
#endif
415
      s->img_buffer += n;
416
}
417
 
418
static int get16(stbi *s)
419
{
420
   int z = get8(s);
421
   return (z << 8) + get8(s);
422
}
423
 
424
static uint32 get32(stbi *s)
425
{
426
   uint32 z = get16(s);
427
   return (z << 16) + get16(s);
428
}
429
 
430
static int get16le(stbi *s)
431
{
432
   int z = get8(s);
433
   return z + (get8(s) << 8);
434
}
435
 
436
static uint32 get32le(stbi *s)
437
{
438
   uint32 z = get16le(s);
439
   return z + (get16le(s) << 16);
440
}
441
 
442
static void getn(stbi *s, stbi_uc *buffer, int n)
443
{
444
#ifndef STBI_NO_STDIO
445
   if (s->img_file) {
446
      fread(buffer, 1, n, s->img_file);
447
      return;
448
   }
449
#endif
450
   memcpy(buffer, s->img_buffer, n);
451
   s->img_buffer += n;
452
}
453
 
454
//////////////////////////////////////////////////////////////////////////////
455
//
456
//  generic converter from built-in img_n to req_comp
457
//    individual types do this automatically as much as possible (e.g. jpeg
458
//    does all cases internally since it needs to colorspace convert anyway,
459
//    and it never has alpha, so very few cases ). png can automatically
460
//    interleave an alpha=255 channel, but falls back to this for other cases
461
//
462
//  assume data buffer is malloced, so malloc a new one and free that one
463
//  only failure mode is malloc failing
464
 
465
static uint8 compute_y(int r, int g, int b)
466
{
467
   return (uint8) (((r*77) + (g*150) +  (29*b)) >> 8);
468
}
469
 
470
static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
471
{
472
   int i,j;
473
   unsigned char *good;
474
 
475
   if (req_comp == img_n) return data;
476
   assert(req_comp >= 1 && req_comp <= 4);
477
 
478
   good = (unsigned char *) malloc(req_comp * x * y);
479
   if (good == NULL) {
480
      free(data);
481
      return epuc("outofmem", "Out of memory");
482
   }
483
 
484
   for (j=0; j < (int) y; ++j) {
485
      unsigned char *src  = data + j * x * img_n   ;
486
      unsigned char *dest = good + j * x * req_comp;
487
 
488
      #define COMBO(a,b)  ((a)*8+(b))
489
      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
490
      // convert source image with img_n components to one with req_comp components;
491
      // avoid switch per pixel, so use switch per scanline and massive macros
492
      switch(COMBO(img_n, req_comp)) {
493
         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
494
         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
495
         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
496
         CASE(2,1) dest[0]=src[0]; break;
497
         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
498
         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
499
         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
500
         CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
501
         CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
502
         CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
503
         CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
504
         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
505
         default: assert(0);
506
      }
507
      #undef CASE
508
   }
509
 
510
   free(data);
511
   return good;
512
}
513
 
514
#ifndef STBI_NO_HDR
515
static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
516
{
517
   int i,k,n;
518
   float *output = (float *) malloc(x * y * comp * sizeof(float));
519
   if (output == NULL) { free(data); return epf("outofmem", "Out of memory"); }
520
   // compute number of non-alpha components
521
   if (comp & 1) n = comp; else n = comp-1;
522
   for (i=0; i < x*y; ++i) {
523
      for (k=0; k < n; ++k) {
524
         output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
525
      }
526
      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
527
   }
528
   free(data);
529
   return output;
530
}
531
 
532
#define float2int(x)   ((int) (x))
533
static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp)
534
{
535
   int i,k,n;
536
   stbi_uc *output = (stbi_uc *) malloc(x * y * comp);
537
   if (output == NULL) { free(data); return epuc("outofmem", "Out of memory"); }
538
   // compute number of non-alpha components
539
   if (comp & 1) n = comp; else n = comp-1;
540
   for (i=0; i < x*y; ++i) {
541
      for (k=0; k < n; ++k) {
542
         float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
543
         if (z < 0) z = 0;
544
         if (z > 255) z = 255;
545
         output[i*comp + k] = float2int(z);
546
      }
547
      if (k < comp) {
548
         float z = data[i*comp+k] * 255 + 0.5f;
549
         if (z < 0) z = 0;
550
         if (z > 255) z = 255;
551
         output[i*comp + k] = float2int(z);
552
      }
553
   }
554
   free(data);
555
   return output;
556
}
557
#endif
558
 
559
//////////////////////////////////////////////////////////////////////////////
560
//
561
//  "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
562
//
563
//    simple implementation
564
//      - channel subsampling of at most 2 in each dimension
565
//      - doesn't support delayed output of y-dimension
566
//      - simple interface (only one output format: 8-bit interleaved RGB)
567
//      - doesn't try to recover corrupt jpegs
568
//      - doesn't allow partial loading, loading multiple at once
569
//      - still fast on x86 (copying globals into locals doesn't help x86)
570
//      - allocates lots of intermediate memory (full size of all components)
571
//        - non-interleaved case requires this anyway
572
//        - allows good upsampling (see next)
573
//    high-quality
574
//      - upsampled channels are bilinearly interpolated, even across blocks
575
//      - quality integer IDCT derived from IJG's 'slow'
576
//    performance
577
//      - fast huffman; reasonable integer IDCT
578
//      - uses a lot of intermediate memory, could cache poorly
579
//      - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
580
//          stb_jpeg:   1.34 seconds (MSVC6, default release build)
581
//          stb_jpeg:   1.06 seconds (MSVC6, processor = Pentium Pro)
582
//          IJL11.dll:  1.08 seconds (compiled by intel)
583
//          IJG 1998:   0.98 seconds (MSVC6, makefile provided by IJG)
584
//          IJG 1998:   0.95 seconds (MSVC6, makefile + proc=PPro)
585
 
586
// huffman decoding acceleration
587
#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
588
 
589
typedef struct
590
{
591
   uint8  fast[1 << FAST_BITS];
592
   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
593
   uint16 code[256];
594
   uint8  values[256];
595
   uint8  size[257];
596
   unsigned int maxcode[18];
597
   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
598
} huffman;
599
 
600
typedef struct
601
{
602
   #if STBI_SIMD
603
   unsigned short dequant2[4][64];
604
   #endif
605
   stbi s;
606
   huffman huff_dc[4];
607
   huffman huff_ac[4];
608
   uint8 dequant[4][64];
609
 
610
// sizes for components, interleaved MCUs
611
   int img_h_max, img_v_max;
612
   int img_mcu_x, img_mcu_y;
613
   int img_mcu_w, img_mcu_h;
614
 
615
// definition of jpeg image component
616
   struct
617
   {
618
      int id;
619
      int h,v;
620
      int tq;
621
      int hd,ha;
622
      int dc_pred;
623
 
624
      int x,y,w2,h2;
625
      uint8 *data;
626
      void *raw_data;
627
      uint8 *linebuf;
628
   } img_comp[4];
629
 
630
   uint32         code_buffer; // jpeg entropy-coded buffer
631
   int            code_bits;   // number of valid bits
632
   unsigned char  marker;      // marker seen while filling entropy buffer
633
   int            nomore;      // flag if we saw a marker so must stop
634
 
635
   int scan_n, order[4];
636
   int restart_interval, todo;
637
} jpeg;
638
 
639
static int build_huffman(huffman *h, int *count)
640
{
641
   int i,j,k=0,code;
642
   // build size list for each symbol (from JPEG spec)
643
   for (i=0; i < 16; ++i)
644
      for (j=0; j < count[i]; ++j)
645
         h->size[k++] = (uint8) (i+1);
646
   h->size[k] = 0;
647
 
648
   // compute actual symbols (from jpeg spec)
649
   code = 0;
650
   k = 0;
651
   for(j=1; j <= 16; ++j) {
652
      // compute delta to add to code to compute symbol id
653
      h->delta[j] = k - code;
654
      if (h->size[k] == j) {
655
         while (h->size[k] == j)
656
            h->code[k++] = (uint16) (code++);
657
         if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
658
      }
659
      // compute largest code + 1 for this size, preshifted as needed later
660
      h->maxcode[j] = code << (16-j);
661
      code <<= 1;
662
   }
663
   h->maxcode[j] = 0xffffffff;
664
 
665
   // build non-spec acceleration table; 255 is flag for not-accelerated
666
   memset(h->fast, 255, 1 << FAST_BITS);
667
   for (i=0; i < k; ++i) {
668
      int s = h->size[i];
669
      if (s <= FAST_BITS) {
670
         int c = h->code[i] << (FAST_BITS-s);
671
         int m = 1 << (FAST_BITS-s);
672
         for (j=0; j < m; ++j) {
673
            h->fast[c+j] = (uint8) i;
674
         }
675
      }
676
   }
677
   return 1;
678
}
679
 
680
static void grow_buffer_unsafe(jpeg *j)
681
{
682
   do {
683
      int b = j->nomore ? 0 : get8(&j->s);
684
      if (b == 0xff) {
685
         int c = get8(&j->s);
686
         if (c != 0) {
687
            j->marker = (unsigned char) c;
688
            j->nomore = 1;
689
            return;
690
         }
691
      }
692
      j->code_buffer = (j->code_buffer << 8) | b;
693
      j->code_bits += 8;
694
   } while (j->code_bits <= 24);
695
}
696
 
697
// (1 << n) - 1
698
static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
699
 
700
// decode a jpeg huffman value from the bitstream
701
__forceinline static int decode(jpeg *j, huffman *h)
702
{
703
   unsigned int temp;
704
   int c,k;
705
 
706
   if (j->code_bits < 16) grow_buffer_unsafe(j);
707
 
708
   // look at the top FAST_BITS and determine what symbol ID it is,
709
   // if the code is <= FAST_BITS
710
   c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1);
711
   k = h->fast[c];
712
   if (k < 255) {
713
      if (h->size[k] > j->code_bits)
714
         return -1;
715
      j->code_bits -= h->size[k];
716
      return h->values[k];
717
   }
718
 
719
   // naive test is to shift the code_buffer down so k bits are
720
   // valid, then test against maxcode. To speed this up, we've
721
   // preshifted maxcode left so that it has (16-k) 0s at the
722
   // end; in other words, regardless of the number of bits, it
723
   // wants to be compared against something shifted to have 16;
724
   // that way we don't need to shift inside the loop.
725
   if (j->code_bits < 16)
726
      temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff;
727
   else
728
      temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff;
729
   for (k=FAST_BITS+1 ; ; ++k)
730
      if (temp < h->maxcode[k])
731
         break;
732
   if (k == 17) {
733
      // error! code not found
734
      j->code_bits -= 16;
735
      return -1;
736
   }
737
 
738
   if (k > j->code_bits)
739
      return -1;
740
 
741
   // convert the huffman code to the symbol id
742
   c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k];
743
   assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
744
 
745
   // convert the id to a symbol
746
   j->code_bits -= k;
747
   return h->values[c];
748
}
749
 
750
// combined JPEG 'receive' and JPEG 'extend', since baseline
751
// always extends everything it receives.
752
__forceinline static int extend_receive(jpeg *j, int n)
753
{
754
   unsigned int m = 1 << (n-1);
755
   unsigned int k;
756
   if (j->code_bits < n) grow_buffer_unsafe(j);
757
   k = (j->code_buffer >> (j->code_bits - n)) & bmask[n];
758
   j->code_bits -= n;
759
   // the following test is probably a random branch that won't
760
   // predict well. I tried to table accelerate it but failed.
761
   // maybe it's compiling as a conditional move?
762
   if (k < m)
763
      return (-1 << n) + k + 1;
764
   else
765
      return k;
766
}
767
 
768
// given a value that's at position X in the zigzag stream,
769
// where does it appear in the 8x8 matrix coded as row-major?
770
static uint8 dezigzag[64+15] =
771
{
772
    0,  1,  8, 16,  9,  2,  3, 10,
773
   17, 24, 32, 25, 18, 11,  4,  5,
774
   12, 19, 26, 33, 40, 48, 41, 34,
775
   27, 20, 13,  6,  7, 14, 21, 28,
776
   35, 42, 49, 56, 57, 50, 43, 36,
777
   29, 22, 15, 23, 30, 37, 44, 51,
778
   58, 59, 52, 45, 38, 31, 39, 46,
779
   53, 60, 61, 54, 47, 55, 62, 63,
780
   // let corrupt input sample past end
781
   63, 63, 63, 63, 63, 63, 63, 63,
782
   63, 63, 63, 63, 63, 63, 63
783
};
784
 
785
// decode one 64-entry block--
786
static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
787
{
788
   int diff,dc,k;
789
   int t = decode(j, hdc);
790
   if (t < 0) return e("bad huffman code","Corrupt JPEG");
791
 
792
   // 0 all the ac values now so we can do it 32-bits at a time
793
   memset(data,0,64*sizeof(data[0]));
794
 
795
   diff = t ? extend_receive(j, t) : 0;
796
   dc = j->img_comp[b].dc_pred + diff;
797
   j->img_comp[b].dc_pred = dc;
798
   data[0] = (short) dc;
799
 
800
   // decode AC components, see JPEG spec
801
   k = 1;
802
   do {
803
      int r,s;
804
      int rs = decode(j, hac);
805
      if (rs < 0) return e("bad huffman code","Corrupt JPEG");
806
      s = rs & 15;
807
      r = rs >> 4;
808
      if (s == 0) {
809
         if (rs != 0xf0) break; // end block
810
         k += 16;
811
      } else {
812
         k += r;
813
         // decode into unzigzag'd location
814
         data[dezigzag[k++]] = (short) extend_receive(j,s);
815
      }
816
   } while (k < 64);
817
   return 1;
818
}
819
 
820
// take a -128..127 value and clamp it and convert to 0..255
821
__forceinline static uint8 clamp(int x)
822
{
823
   x += 128;
824
   // trick to use a single test to catch both cases
825
   if ((unsigned int) x > 255) {
826
      if (x < 0) return 0;
827
      if (x > 255) return 255;
828
   }
829
   return (uint8) x;
830
}
831
 
832
#define f2f(x)  (int) (((x) * 4096 + 0.5))
833
#define fsh(x)  ((x) << 12)
834
 
835
// derived from jidctint -- DCT_ISLOW
836
#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7)       \
837
   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
838
   p2 = s2;                                    \
839
   p3 = s6;                                    \
840
   p1 = (p2+p3) * f2f(0.5411961f);             \
841
   t2 = p1 + p3*f2f(-1.847759065f);            \
842
   t3 = p1 + p2*f2f( 0.765366865f);            \
843
   p2 = s0;                                    \
844
   p3 = s4;                                    \
845
   t0 = fsh(p2+p3);                            \
846
   t1 = fsh(p2-p3);                            \
847
   x0 = t0+t3;                                 \
848
   x3 = t0-t3;                                 \
849
   x1 = t1+t2;                                 \
850
   x2 = t1-t2;                                 \
851
   t0 = s7;                                    \
852
   t1 = s5;                                    \
853
   t2 = s3;                                    \
854
   t3 = s1;                                    \
855
   p3 = t0+t2;                                 \
856
   p4 = t1+t3;                                 \
857
   p1 = t0+t3;                                 \
858
   p2 = t1+t2;                                 \
859
   p5 = (p3+p4)*f2f( 1.175875602f);            \
860
   t0 = t0*f2f( 0.298631336f);                 \
861
   t1 = t1*f2f( 2.053119869f);                 \
862
   t2 = t2*f2f( 3.072711026f);                 \
863
   t3 = t3*f2f( 1.501321110f);                 \
864
   p1 = p5 + p1*f2f(-0.899976223f);            \
865
   p2 = p5 + p2*f2f(-2.562915447f);            \
866
   p3 = p3*f2f(-1.961570560f);                 \
867
   p4 = p4*f2f(-0.390180644f);                 \
868
   t3 += p1+p4;                                \
869
   t2 += p2+p3;                                \
870
   t1 += p2+p4;                                \
871
   t0 += p1+p3;
872
 
873
#if !STBI_SIMD
874
// .344 seconds on 3*anemones.jpg
875
static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
876
{
877
   int i,val[64],*v=val;
878
   uint8 *o,*dq = dequantize;
879
   short *d = data;
880
 
881
   // columns
882
   for (i=0; i < 8; ++i,++d,++dq, ++v) {
883
      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
884
      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
885
           && d[40]==0 && d[48]==0 && d[56]==0) {
886
         //    no shortcut                 0     seconds
887
         //    (1|2|3|4|5|6|7)==0          0     seconds
888
         //    all separate               -0.047 seconds
889
         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
890
         int dcterm = d[0] * dq[0] << 2;
891
         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
892
      } else {
893
         IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
894
                 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
895
         // constants scaled things up by 1<<12; let's bring them back
896
         // down, but keep 2 extra bits of precision
897
         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
898
         v[ 0] = (x0+t3) >> 10;
899
         v[56] = (x0-t3) >> 10;
900
         v[ 8] = (x1+t2) >> 10;
901
         v[48] = (x1-t2) >> 10;
902
         v[16] = (x2+t1) >> 10;
903
         v[40] = (x2-t1) >> 10;
904
         v[24] = (x3+t0) >> 10;
905
         v[32] = (x3-t0) >> 10;
906
      }
907
   }
908
 
909
   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
910
      // no fast case since the first 1D IDCT spread components out
911
      IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
912
      // constants scaled things up by 1<<12, plus we had 1<<2 from first
913
      // loop, plus horizontal and vertical each scale by sqrt(8) so together
914
      // we've got an extra 1<<3, so 1<<17 total we need to remove.
915
      x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
916
      o[0] = clamp((x0+t3) >> 17);
917
      o[7] = clamp((x0-t3) >> 17);
918
      o[1] = clamp((x1+t2) >> 17);
919
      o[6] = clamp((x1-t2) >> 17);
920
      o[2] = clamp((x2+t1) >> 17);
921
      o[5] = clamp((x2-t1) >> 17);
922
      o[3] = clamp((x3+t0) >> 17);
923
      o[4] = clamp((x3-t0) >> 17);
924
   }
925
}
926
#else
927
static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize)
928
{
929
   int i,val[64],*v=val;
930
   uint8 *o;
931
   unsigned short *dq = dequantize;
932
   short *d = data;
933
 
934
   // columns
935
   for (i=0; i < 8; ++i,++d,++dq, ++v) {
936
      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
937
      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
938
           && d[40]==0 && d[48]==0 && d[56]==0) {
939
         //    no shortcut                 0     seconds
940
         //    (1|2|3|4|5|6|7)==0          0     seconds
941
         //    all separate               -0.047 seconds
942
         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
943
         int dcterm = d[0] * dq[0] << 2;
944
         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
945
      } else {
946
         IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
947
                 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
948
         // constants scaled things up by 1<<12; let's bring them back
949
         // down, but keep 2 extra bits of precision
950
         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
951
         v[ 0] = (x0+t3) >> 10;
952
         v[56] = (x0-t3) >> 10;
953
         v[ 8] = (x1+t2) >> 10;
954
         v[48] = (x1-t2) >> 10;
955
         v[16] = (x2+t1) >> 10;
956
         v[40] = (x2-t1) >> 10;
957
         v[24] = (x3+t0) >> 10;
958
         v[32] = (x3-t0) >> 10;
959
      }
960
   }
961
 
962
   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
963
      // no fast case since the first 1D IDCT spread components out
964
      IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
965
      // constants scaled things up by 1<<12, plus we had 1<<2 from first
966
      // loop, plus horizontal and vertical each scale by sqrt(8) so together
967
      // we've got an extra 1<<3, so 1<<17 total we need to remove.
968
      x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
969
      o[0] = clamp((x0+t3) >> 17);
970
      o[7] = clamp((x0-t3) >> 17);
971
      o[1] = clamp((x1+t2) >> 17);
972
      o[6] = clamp((x1-t2) >> 17);
973
      o[2] = clamp((x2+t1) >> 17);
974
      o[5] = clamp((x2-t1) >> 17);
975
      o[3] = clamp((x3+t0) >> 17);
976
      o[4] = clamp((x3-t0) >> 17);
977
   }
978
}
979
static stbi_idct_8x8 stbi_idct_installed = idct_block;
980
 
981
extern void stbi_install_idct(stbi_idct_8x8 func)
982
{
983
   stbi_idct_installed = func;
984
}
985
#endif
986
 
987
#define MARKER_none  0xff
988
// if there's a pending marker from the entropy stream, return that
989
// otherwise, fetch from the stream and get a marker. if there's no
990
// marker, return 0xff, which is never a valid marker value
991
static uint8 get_marker(jpeg *j)
992
{
993
   uint8 x;
994
   if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; }
995
   x = get8u(&j->s);
996
   if (x != 0xff) return MARKER_none;
997
   while (x == 0xff)
998
      x = get8u(&j->s);
999
   return x;
1000
}
1001
 
1002
// in each scan, we'll have scan_n components, and the order
1003
// of the components is specified by order[]
1004
#define RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
1005
 
1006
// after a restart interval, reset the entropy decoder and
1007
// the dc prediction
1008
static void reset(jpeg *j)
1009
{
1010
   j->code_bits = 0;
1011
   j->code_buffer = 0;
1012
   j->nomore = 0;
1013
   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1014
   j->marker = MARKER_none;
1015
   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1016
   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1017
   // since we don't even allow 1<<30 pixels
1018
}
1019
 
1020
static int parse_entropy_coded_data(jpeg *z)
1021
{
1022
   reset(z);
1023
   if (z->scan_n == 1) {
1024
      int i,j;
1025
      #if STBI_SIMD
1026
      __declspec(align(16))
1027
      #endif
1028
      short data[64];
1029
      int n = z->order[0];
1030
      // non-interleaved data, we just need to process one block at a time,
1031
      // in trivial scanline order
1032
      // number of blocks to do just depends on how many actual "pixels" this
1033
      // component has, independent of interleaved MCU blocking and such
1034
      int w = (z->img_comp[n].x+7) >> 3;
1035
      int h = (z->img_comp[n].y+7) >> 3;
1036
      for (j=0; j < h; ++j) {
1037
         for (i=0; i < w; ++i) {
1038
            if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1039
            #if STBI_SIMD
1040
            stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1041
            #else
1042
            idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1043
            #endif
1044
            // every data block is an MCU, so countdown the restart interval
1045
            if (--z->todo <= 0) {
1046
               if (z->code_bits < 24) grow_buffer_unsafe(z);
1047
               // if it's NOT a restart, then just bail, so we get corrupt data
1048
               // rather than no data
1049
               if (!RESTART(z->marker)) return 1;
1050
               reset(z);
1051
            }
1052
         }
1053
      }
1054
   } else { // interleaved!
1055
      int i,j,k,x,y;
1056
      short data[64];
1057
      for (j=0; j < z->img_mcu_y; ++j) {
1058
         for (i=0; i < z->img_mcu_x; ++i) {
1059
            // scan an interleaved mcu... process scan_n components in order
1060
            for (k=0; k < z->scan_n; ++k) {
1061
               int n = z->order[k];
1062
               // scan out an mcu's worth of this component; that's just determined
1063
               // by the basic H and V specified for the component
1064
               for (y=0; y < z->img_comp[n].v; ++y) {
1065
                  for (x=0; x < z->img_comp[n].h; ++x) {
1066
                     int x2 = (i*z->img_comp[n].h + x)*8;
1067
                     int y2 = (j*z->img_comp[n].v + y)*8;
1068
                     if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1069
                     #if STBI_SIMD
1070
                     stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1071
                     #else
1072
                     idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1073
                     #endif
1074
                  }
1075
               }
1076
            }
1077
            // after all interleaved components, that's an interleaved MCU,
1078
            // so now count down the restart interval
1079
            if (--z->todo <= 0) {
1080
               if (z->code_bits < 24) grow_buffer_unsafe(z);
1081
               // if it's NOT a restart, then just bail, so we get corrupt data
1082
               // rather than no data
1083
               if (!RESTART(z->marker)) return 1;
1084
               reset(z);
1085
            }
1086
         }
1087
      }
1088
   }
1089
   return 1;
1090
}
1091
 
1092
static int process_marker(jpeg *z, int m)
1093
{
1094
   int L;
1095
   switch (m) {
1096
      case MARKER_none: // no marker found
1097
         return e("expected marker","Corrupt JPEG");
1098
 
1099
      case 0xC2: // SOF - progressive
1100
         return e("progressive jpeg","JPEG format not supported (progressive)");
1101
 
1102
      case 0xDD: // DRI - specify restart interval
1103
         if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG");
1104
         z->restart_interval = get16(&z->s);
1105
         return 1;
1106
 
1107
      case 0xDB: // DQT - define quantization table
1108
         L = get16(&z->s)-2;
1109
         while (L > 0) {
1110
            int q = get8(&z->s);
1111
            int p = q >> 4;
1112
            int t = q & 15,i;
1113
            if (p != 0) return e("bad DQT type","Corrupt JPEG");
1114
            if (t > 3) return e("bad DQT table","Corrupt JPEG");
1115
            for (i=0; i < 64; ++i)
1116
               z->dequant[t][dezigzag[i]] = get8u(&z->s);
1117
            #if STBI_SIMD
1118
            for (i=0; i < 64; ++i)
1119
               z->dequant2[t][i] = dequant[t][i];
1120
            #endif
1121
            L -= 65;
1122
         }
1123
         return L==0;
1124
 
1125
      case 0xC4: // DHT - define huffman table
1126
         L = get16(&z->s)-2;
1127
         while (L > 0) {
1128
            uint8 *v;
1129
            int sizes[16],i,m=0;
1130
            int q = get8(&z->s);
1131
            int tc = q >> 4;
1132
            int th = q & 15;
1133
            if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
1134
            for (i=0; i < 16; ++i) {
1135
               sizes[i] = get8(&z->s);
1136
               m += sizes[i];
1137
            }
1138
            L -= 17;
1139
            if (tc == 0) {
1140
               if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1141
               v = z->huff_dc[th].values;
1142
            } else {
1143
               if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1144
               v = z->huff_ac[th].values;
1145
            }
1146
            for (i=0; i < m; ++i)
1147
               v[i] = get8u(&z->s);
1148
            L -= m;
1149
         }
1150
         return L==0;
1151
   }
1152
   // check for comment block or APP blocks
1153
   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
1154
      skip(&z->s, get16(&z->s)-2);
1155
      return 1;
1156
   }
1157
   return 0;
1158
}
1159
 
1160
// after we see SOS
1161
static int process_scan_header(jpeg *z)
1162
{
1163
   int i;
1164
   int Ls = get16(&z->s);
1165
   z->scan_n = get8(&z->s);
1166
   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG");
1167
   if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG");
1168
   for (i=0; i < z->scan_n; ++i) {
1169
      int id = get8(&z->s), which;
1170
      int q = get8(&z->s);
1171
      for (which = 0; which < z->s.img_n; ++which)
1172
         if (z->img_comp[which].id == id)
1173
            break;
1174
      if (which == z->s.img_n) return 0;
1175
      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
1176
      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
1177
      z->order[i] = which;
1178
   }
1179
   if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1180
   get8(&z->s); // should be 63, but might be 0
1181
   if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1182
 
1183
   return 1;
1184
}
1185
 
1186
static int process_frame_header(jpeg *z, int scan)
1187
{
1188
   stbi *s = &z->s;
1189
   int Lf,p,i,q, h_max=1,v_max=1,c;
1190
   Lf = get16(s);         if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
1191
   p  = get8(s);          if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
1192
   s->img_y = get16(s);   if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1193
   s->img_x = get16(s);   if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
1194
   c = get8(s);
1195
   if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG");    // JFIF requires
1196
   s->img_n = c;
1197
   for (i=0; i < c; ++i) {
1198
      z->img_comp[i].data = NULL;
1199
      z->img_comp[i].linebuf = NULL;
1200
   }
1201
 
1202
   if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG");
1203
 
1204
   for (i=0; i < s->img_n; ++i) {
1205
      z->img_comp[i].id = get8(s);
1206
      if (z->img_comp[i].id != i+1)   // JFIF requires
1207
         if (z->img_comp[i].id != i)  // some version of jpegtran outputs non-JFIF-compliant files!
1208
            return e("bad component ID","Corrupt JPEG");
1209
      q = get8(s);
1210
      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
1211
      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
1212
      z->img_comp[i].tq = get8(s);  if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
1213
   }
1214
 
1215
   if (scan != SCAN_load) return 1;
1216
 
1217
   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1218
 
1219
   for (i=0; i < s->img_n; ++i) {
1220
      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1221
      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1222
   }
1223
 
1224
   // compute interleaved mcu info
1225
   z->img_h_max = h_max;
1226
   z->img_v_max = v_max;
1227
   z->img_mcu_w = h_max * 8;
1228
   z->img_mcu_h = v_max * 8;
1229
   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1230
   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1231
 
1232
   for (i=0; i < s->img_n; ++i) {
1233
      // number of effective pixels (e.g. for non-interleaved MCU)
1234
      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1235
      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1236
      // to simplify generation, we'll allocate enough memory to decode
1237
      // the bogus oversized data from using interleaved MCUs and their
1238
      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1239
      // discard the extra data until colorspace conversion
1240
      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1241
      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1242
      z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1243
      if (z->img_comp[i].raw_data == NULL) {
1244
         for(--i; i >= 0; --i) {
1245
            free(z->img_comp[i].raw_data);
1246
            z->img_comp[i].data = NULL;
1247
         }
1248
         return e("outofmem", "Out of memory");
1249
      }
1250
      // align blocks for installable-idct using mmx/sse
1251
      z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1252
      z->img_comp[i].linebuf = NULL;
1253
   }
1254
 
1255
   return 1;
1256
}
1257
 
1258
// use comparisons since in some cases we handle more than one case (e.g. SOF)
1259
#define DNL(x)         ((x) == 0xdc)
1260
#define SOI(x)         ((x) == 0xd8)
1261
#define EOI(x)         ((x) == 0xd9)
1262
#define SOF(x)         ((x) == 0xc0 || (x) == 0xc1)
1263
#define SOS(x)         ((x) == 0xda)
1264
 
1265
static int decode_jpeg_header(jpeg *z, int scan)
1266
{
1267
   int m;
1268
   z->marker = MARKER_none; // initialize cached marker to empty
1269
   m = get_marker(z);
1270
   if (!SOI(m)) return e("no SOI","Corrupt JPEG");
1271
   if (scan == SCAN_type) return 1;
1272
   m = get_marker(z);
1273
   while (!SOF(m)) {
1274
      if (!process_marker(z,m)) return 0;
1275
      m = get_marker(z);
1276
      while (m == MARKER_none) {
1277
         // some files have extra padding after their blocks, so ok, we'll scan
1278
         if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG");
1279
         m = get_marker(z);
1280
      }
1281
   }
1282
   if (!process_frame_header(z, scan)) return 0;
1283
   return 1;
1284
}
1285
 
1286
static int decode_jpeg_image(jpeg *j)
1287
{
1288
   int m;
1289
   j->restart_interval = 0;
1290
   if (!decode_jpeg_header(j, SCAN_load)) return 0;
1291
   m = get_marker(j);
1292
   while (!EOI(m)) {
1293
      if (SOS(m)) {
1294
         if (!process_scan_header(j)) return 0;
1295
         if (!parse_entropy_coded_data(j)) return 0;
1296
      } else {
1297
         if (!process_marker(j, m)) return 0;
1298
      }
1299
      m = get_marker(j);
1300
   }
1301
   return 1;
1302
}
1303
 
1304
// static jfif-centered resampling (across block boundaries)
1305
 
1306
typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1307
                                    int w, int hs);
1308
 
1309
#define div4(x) ((uint8) ((x) >> 2))
1310
 
1311
static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1312
{
1313
   return in_near;
1314
}
1315
 
1316
static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1317
{
1318
   // need to generate two samples vertically for every one in input
1319
   int i;
1320
   for (i=0; i < w; ++i)
1321
      out[i] = div4(3*in_near[i] + in_far[i] + 2);
1322
   return out;
1323
}
1324
 
1325
static uint8*  resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1326
{
1327
   // need to generate two samples horizontally for every one in input
1328
   int i;
1329
   uint8 *input = in_near;
1330
   if (w == 1) {
1331
      // if only one sample, can't do any interpolation
1332
      out[0] = out[1] = input[0];
1333
      return out;
1334
   }
1335
 
1336
   out[0] = input[0];
1337
   out[1] = div4(input[0]*3 + input[1] + 2);
1338
   for (i=1; i < w-1; ++i) {
1339
      int n = 3*input[i]+2;
1340
      out[i*2+0] = div4(n+input[i-1]);
1341
      out[i*2+1] = div4(n+input[i+1]);
1342
   }
1343
   out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2);
1344
   out[i*2+1] = input[w-1];
1345
   return out;
1346
}
1347
 
1348
#define div16(x) ((uint8) ((x) >> 4))
1349
 
1350
static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1351
{
1352
   // need to generate 2x2 samples for every one in input
1353
   int i,t0,t1;
1354
   if (w == 1) {
1355
      out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2);
1356
      return out;
1357
   }
1358
 
1359
   t1 = 3*in_near[0] + in_far[0];
1360
   out[0] = div4(t1+2);
1361
   for (i=1; i < w; ++i) {
1362
      t0 = t1;
1363
      t1 = 3*in_near[i]+in_far[i];
1364
      out[i*2-1] = div16(3*t0 + t1 + 8);
1365
      out[i*2  ] = div16(3*t1 + t0 + 8);
1366
   }
1367
   out[w*2-1] = div4(t1+2);
1368
   return out;
1369
}
1370
 
1371
static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1372
{
1373
   // resample with nearest-neighbor
1374
   int i,j;
1375
   for (i=0; i < w; ++i)
1376
      for (j=0; j < hs; ++j)
1377
         out[i*hs+j] = in_near[i];
1378
   return out;
1379
}
1380
 
1381
#define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
1382
 
1383
// 0.38 seconds on 3*anemones.jpg   (0.25 with processor = Pro)
1384
// VC6 without processor=Pro is generating multiple LEAs per multiply!
1385
static void YCbCr_to_RGB_row(uint8 *out, uint8 *y, uint8 *pcb, uint8 *pcr, int count, int step)
1386
{
1387
   int i;
1388
   for (i=0; i < count; ++i) {
1389
      int y_fixed = (y[i] << 16) + 32768; // rounding
1390
      int r,g,b;
1391
      int cr = pcr[i] - 128;
1392
      int cb = pcb[i] - 128;
1393
      r = y_fixed + cr*float2fixed(1.40200f);
1394
      g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
1395
      b = y_fixed                            + cb*float2fixed(1.77200f);
1396
      r >>= 16;
1397
      g >>= 16;
1398
      b >>= 16;
1399
      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1400
      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1401
      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1402
      out[0] = (uint8)r;
1403
      out[1] = (uint8)g;
1404
      out[2] = (uint8)b;
1405
      out[3] = 255;
1406
      out += step;
1407
   }
1408
}
1409
 
1410
#if STBI_SIMD
1411
static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1412
 
1413
void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1414
{
1415
   stbi_YCbCr_installed = func;
1416
}
1417
#endif
1418
 
1419
 
1420
// clean up the temporary component buffers
1421
static void cleanup_jpeg(jpeg *j)
1422
{
1423
   int i;
1424
   for (i=0; i < j->s.img_n; ++i) {
1425
      if (j->img_comp[i].data) {
1426
         free(j->img_comp[i].raw_data);
1427
         j->img_comp[i].data = NULL;
1428
      }
1429
      if (j->img_comp[i].linebuf) {
1430
         free(j->img_comp[i].linebuf);
1431
         j->img_comp[i].linebuf = NULL;
1432
      }
1433
   }
1434
}
1435
 
1436
typedef struct
1437
{
1438
   resample_row_func resample;
1439
   uint8 *line0,*line1;
1440
   int hs,vs;   // expansion factor in each axis
1441
   int w_lores; // horizontal pixels pre-expansion
1442
   int ystep;   // how far through vertical expansion we are
1443
   int ypos;    // which pre-expansion row we're on
1444
} stbi_resample;
1445
 
1446
static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1447
{
1448
   int n, decode_n;
1449
   // validate req_comp
1450
   if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1451
   z->s.img_n = 0;
1452
 
1453
   // load a jpeg image from whichever source
1454
   if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; }
1455
 
1456
   // determine actual number of components to generate
1457
   n = req_comp ? req_comp : z->s.img_n;
1458
 
1459
   if (z->s.img_n == 3 && n < 3)
1460
      decode_n = 1;
1461
   else
1462
      decode_n = z->s.img_n;
1463
 
1464
   // resample and color-convert
1465
   {
1466
      int k;
1467
      uint i,j;
1468
      uint8 *output;
1469
      uint8 *coutput[4];
1470
 
1471
      stbi_resample res_comp[4];
1472
 
1473
      for (k=0; k < decode_n; ++k) {
1474
         stbi_resample *r = &res_comp[k];
1475
 
1476
         // allocate line buffer big enough for upsampling off the edges
1477
         // with upsample factor of 4
1478
         z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3);
1479
         if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1480
 
1481
         r->hs      = z->img_h_max / z->img_comp[k].h;
1482
         r->vs      = z->img_v_max / z->img_comp[k].v;
1483
         r->ystep   = r->vs >> 1;
1484
         r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
1485
         r->ypos    = 0;
1486
         r->line0   = r->line1 = z->img_comp[k].data;
1487
 
1488
         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
1489
         else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
1490
         else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
1491
         else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
1492
         else                               r->resample = resample_row_generic;
1493
      }
1494
 
1495
      // can't error after this so, this is safe
1496
      output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1);
1497
      if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1498
 
1499
      // now go ahead and resample
1500
      for (j=0; j < z->s.img_y; ++j) {
1501
         uint8 *out = output + n * z->s.img_x * j;
1502
         for (k=0; k < decode_n; ++k) {
1503
            stbi_resample *r = &res_comp[k];
1504
            int y_bot = r->ystep >= (r->vs >> 1);
1505
            coutput[k] = r->resample(z->img_comp[k].linebuf,
1506
                                     y_bot ? r->line1 : r->line0,
1507
                                     y_bot ? r->line0 : r->line1,
1508
                                     r->w_lores, r->hs);
1509
            if (++r->ystep >= r->vs) {
1510
               r->ystep = 0;
1511
               r->line0 = r->line1;
1512
               if (++r->ypos < z->img_comp[k].y)
1513
                  r->line1 += z->img_comp[k].w2;
1514
            }
1515
         }
1516
         if (n >= 3) {
1517
            uint8 *y = coutput[0];
1518
            if (z->s.img_n == 3) {
1519
               #if STBI_SIMD
1520
               stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1521
               #else
1522
               YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
1523
               #endif
1524
            } else
1525
               for (i=0; i < z->s.img_x; ++i) {
1526
                  out[0] = out[1] = out[2] = y[i];
1527
                  out[3] = 255; // not used if n==3
1528
                  out += n;
1529
               }
1530
         } else {
1531
            uint8 *y = coutput[0];
1532
            if (n == 1)
1533
               for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
1534
            else
1535
               for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
1536
         }
1537
      }
1538
      cleanup_jpeg(z);
1539
      *out_x = z->s.img_x;
1540
      *out_y = z->s.img_y;
1541
      if (comp) *comp  = z->s.img_n; // report original components, not output
1542
      return output;
1543
   }
1544
}
1545
 
1546
#ifndef STBI_NO_STDIO
1547
unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1548
{
1549
   jpeg j;
1550
   start_file(&j.s, f);
1551
   return load_jpeg_image(&j, x,y,comp,req_comp);
1552
}
1553
 
1554
unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1555
{
1556
   unsigned char *data;
1557
   FILE *f = fopen(filename, "rb");
1558
   if (!f) return NULL;
1559
   data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
1560
   fclose(f);
1561
   return data;
1562
}
1563
#endif
1564
 
1565
unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1566
{
1567
   jpeg j;
1568
   start_mem(&j.s, buffer,len);
1569
   return load_jpeg_image(&j, x,y,comp,req_comp);
1570
}
1571
 
1572
#ifndef STBI_NO_STDIO
1573
int stbi_jpeg_test_file(FILE *f)
1574
{
1575
   int n,r;
1576
   jpeg j;
1577
   n = ftell(f);
1578
   start_file(&j.s, f);
1579
   r = decode_jpeg_header(&j, SCAN_type);
1580
   fseek(f,n,SEEK_SET);
1581
   return r;
1582
}
1583
#endif
1584
 
1585
int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
1586
{
1587
   jpeg j;
1588
   start_mem(&j.s, buffer,len);
1589
   return decode_jpeg_header(&j, SCAN_type);
1590
}
1591
 
1592
// @TODO:
1593
#ifndef STBI_NO_STDIO
1594
extern int      stbi_jpeg_info            (char const *filename,           int *x, int *y, int *comp);
1595
extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
1596
#endif
1597
extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
1598
 
1599
// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
1600
//    simple implementation
1601
//      - all input must be provided in an upfront buffer
1602
//      - all output is written to a single output buffer (can malloc/realloc)
1603
//    performance
1604
//      - fast huffman
1605
 
1606
// fast-way is faster to check than jpeg huffman, but slow way is slower
1607
#define ZFAST_BITS  9 // accelerate all cases in default tables
1608
#define ZFAST_MASK  ((1 << ZFAST_BITS) - 1)
1609
 
1610
// zlib-style huffman encoding
1611
// (jpegs packs from left, zlib from right, so can't share code)
1612
typedef struct
1613
{
1614
   uint16 fast[1 << ZFAST_BITS];
1615
   uint16 firstcode[16];
1616
   int maxcode[17];
1617
   uint16 firstsymbol[16];
1618
   uint8  size[288];
1619
   uint16 value[288];
1620
} zhuffman;
1621
 
1622
__forceinline static int bitreverse16(int n)
1623
{
1624
  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
1625
  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
1626
  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
1627
  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
1628
  return n;
1629
}
1630
 
1631
__forceinline static int bit_reverse(int v, int bits)
1632
{
1633
   assert(bits <= 16);
1634
   // to bit reverse n bits, reverse 16 and shift
1635
   // e.g. 11 bits, bit reverse and shift away 5
1636
   return bitreverse16(v) >> (16-bits);
1637
}
1638
 
1639
static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1640
{
1641
   int i,k=0;
1642
   int code, next_code[16], sizes[17];
1643
 
1644
   // DEFLATE spec for generating codes
1645
   memset(sizes, 0, sizeof(sizes));
1646
   memset(z->fast, 255, sizeof(z->fast));
1647
   for (i=0; i < num; ++i)
1648
      ++sizes[sizelist[i]];
1649
   sizes[0] = 0;
1650
   for (i=1; i < 16; ++i)
1651
      assert(sizes[i] <= (1 << i));
1652
   code = 0;
1653
   for (i=1; i < 16; ++i) {
1654
      next_code[i] = code;
1655
      z->firstcode[i] = (uint16) code;
1656
      z->firstsymbol[i] = (uint16) k;
1657
      code = (code + sizes[i]);
1658
      if (sizes[i])
1659
         if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
1660
      z->maxcode[i] = code << (16-i); // preshift for inner loop
1661
      code <<= 1;
1662
      k += sizes[i];
1663
   }
1664
   z->maxcode[16] = 0x10000; // sentinel
1665
   for (i=0; i < num; ++i) {
1666
      int s = sizelist[i];
1667
      if (s) {
1668
         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1669
         z->size[c] = (uint8)s;
1670
         z->value[c] = (uint16)i;
1671
         if (s <= ZFAST_BITS) {
1672
            int k = bit_reverse(next_code[s],s);
1673
            while (k < (1 << ZFAST_BITS)) {
1674
               z->fast[k] = (uint16) c;
1675
               k += (1 << s);
1676
            }
1677
         }
1678
         ++next_code[s];
1679
      }
1680
   }
1681
   return 1;
1682
}
1683
 
1684
// zlib-from-memory implementation for PNG reading
1685
//    because PNG allows splitting the zlib stream arbitrarily,
1686
//    and it's annoying structurally to have PNG call ZLIB call PNG,
1687
//    we require PNG read all the IDATs and combine them into a single
1688
//    memory buffer
1689
 
1690
typedef struct
1691
{
1692
   uint8 *zbuffer, *zbuffer_end;
1693
   int num_bits;
1694
   uint32 code_buffer;
1695
 
1696
   char *zout;
1697
   char *zout_start;
1698
   char *zout_end;
1699
   int   z_expandable;
1700
 
1701
   zhuffman z_length, z_distance;
1702
} zbuf;
1703
 
1704
__forceinline static int zget8(zbuf *z)
1705
{
1706
   if (z->zbuffer >= z->zbuffer_end) return 0;
1707
   return *z->zbuffer++;
1708
}
1709
 
1710
static void fill_bits(zbuf *z)
1711
{
1712
   do {
1713
      assert(z->code_buffer < (1U << z->num_bits));
1714
      z->code_buffer |= zget8(z) << z->num_bits;
1715
      z->num_bits += 8;
1716
   } while (z->num_bits <= 24);
1717
}
1718
 
1719
__forceinline static unsigned int zreceive(zbuf *z, int n)
1720
{
1721
   unsigned int k;
1722
   if (z->num_bits < n) fill_bits(z);
1723
   k = z->code_buffer & ((1 << n) - 1);
1724
   z->code_buffer >>= n;
1725
   z->num_bits -= n;
1726
   return k;
1727
}
1728
 
1729
__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
1730
{
1731
   int b,s,k;
1732
   if (a->num_bits < 16) fill_bits(a);
1733
   b = z->fast[a->code_buffer & ZFAST_MASK];
1734
   if (b < 0xffff) {
1735
      s = z->size[b];
1736
      a->code_buffer >>= s;
1737
      a->num_bits -= s;
1738
      return z->value[b];
1739
   }
1740
 
1741
   // not resolved by fast table, so compute it the slow way
1742
   // use jpeg approach, which requires MSbits at top
1743
   k = bit_reverse(a->code_buffer, 16);
1744
   for (s=ZFAST_BITS+1; ; ++s)
1745
      if (k < z->maxcode[s])
1746
         break;
1747
   if (s == 16) return -1; // invalid code!
1748
   // code size is s, so:
1749
   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
1750
   assert(z->size[b] == s);
1751
   a->code_buffer >>= s;
1752
   a->num_bits -= s;
1753
   return z->value[b];
1754
}
1755
 
1756
static int expand(zbuf *z, int n)  // need to make room for n bytes
1757
{
1758
   char *q;
1759
   int cur, limit;
1760
   if (!z->z_expandable) return e("output buffer limit","Corrupt PNG");
1761
   cur   = (int) (z->zout     - z->zout_start);
1762
   limit = (int) (z->zout_end - z->zout_start);
1763
   while (cur + n > limit)
1764
      limit *= 2;
1765
   q = (char *) realloc(z->zout_start, limit);
1766
   if (q == NULL) return e("outofmem", "Out of memory");
1767
   z->zout_start = q;
1768
   z->zout       = q + cur;
1769
   z->zout_end   = q + limit;
1770
   return 1;
1771
}
1772
 
1773
static int length_base[31] = {
1774
   3,4,5,6,7,8,9,10,11,13,
1775
   15,17,19,23,27,31,35,43,51,59,
1776
   67,83,99,115,131,163,195,227,258,0,0 };
1777
 
1778
static int length_extra[31]=
1779
{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
1780
 
1781
static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
1782
257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
1783
 
1784
static int dist_extra[32] =
1785
{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
1786
 
1787
static int parse_huffman_block(zbuf *a)
1788
{
1789
   for(;;) {
1790
      int z = zhuffman_decode(a, &a->z_length);
1791
      if (z < 256) {
1792
         if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes
1793
         if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
1794
         *a->zout++ = (char) z;
1795
      } else {
1796
         uint8 *p;
1797
         int len,dist;
1798
         if (z == 256) return 1;
1799
         z -= 257;
1800
         len = length_base[z];
1801
         if (length_extra[z]) len += zreceive(a, length_extra[z]);
1802
         z = zhuffman_decode(a, &a->z_distance);
1803
         if (z < 0) return e("bad huffman code","Corrupt PNG");
1804
         dist = dist_base[z];
1805
         if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
1806
         if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG");
1807
         if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
1808
         p = (uint8 *) (a->zout - dist);
1809
         while (len--)
1810
            *a->zout++ = *p++;
1811
      }
1812
   }
1813
}
1814
 
1815
static int compute_huffman_codes(zbuf *a)
1816
{
1817
   static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
1818
   static zhuffman z_codelength; // static just to save stack space
1819
   uint8 lencodes[286+32+137];//padding for maximum single op
1820
   uint8 codelength_sizes[19];
1821
   int i,n;
1822
 
1823
   int hlit  = zreceive(a,5) + 257;
1824
   int hdist = zreceive(a,5) + 1;
1825
   int hclen = zreceive(a,4) + 4;
1826
 
1827
   memset(codelength_sizes, 0, sizeof(codelength_sizes));
1828
   for (i=0; i < hclen; ++i) {
1829
      int s = zreceive(a,3);
1830
      codelength_sizes[length_dezigzag[i]] = (uint8) s;
1831
   }
1832
   if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
1833
 
1834
   n = 0;
1835
   while (n < hlit + hdist) {
1836
      int c = zhuffman_decode(a, &z_codelength);
1837
      assert(c >= 0 && c < 19);
1838
      if (c < 16)
1839
         lencodes[n++] = (uint8) c;
1840
      else if (c == 16) {
1841
         c = zreceive(a,2)+3;
1842
         memset(lencodes+n, lencodes[n-1], c);
1843
         n += c;
1844
      } else if (c == 17) {
1845
         c = zreceive(a,3)+3;
1846
         memset(lencodes+n, 0, c);
1847
         n += c;
1848
      } else {
1849
         assert(c == 18);
1850
         c = zreceive(a,7)+11;
1851
         memset(lencodes+n, 0, c);
1852
         n += c;
1853
      }
1854
   }
1855
   if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG");
1856
   if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
1857
   if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
1858
   return 1;
1859
}
1860
 
1861
static int parse_uncompressed_block(zbuf *a)
1862
{
1863
   uint8 header[4];
1864
   int len,nlen,k;
1865
   if (a->num_bits & 7)
1866
      zreceive(a, a->num_bits & 7); // discard
1867
   // drain the bit-packed data into header
1868
   k = 0;
1869
   while (a->num_bits > 0) {
1870
      header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
1871
      a->code_buffer >>= 8;
1872
      a->num_bits -= 8;
1873
   }
1874
   assert(a->num_bits == 0);
1875
   // now fill header the normal way
1876
   while (k < 4)
1877
      header[k++] = (uint8) zget8(a);
1878
   len  = header[1] * 256 + header[0];
1879
   nlen = header[3] * 256 + header[2];
1880
   if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG");
1881
   if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG");
1882
   if (a->zout + len > a->zout_end)
1883
      if (!expand(a, len)) return 0;
1884
   memcpy(a->zout, a->zbuffer, len);
1885
   a->zbuffer += len;
1886
   a->zout += len;
1887
   return 1;
1888
}
1889
 
1890
static int parse_zlib_header(zbuf *a)
1891
{
1892
   int cmf   = zget8(a);
1893
   int cm    = cmf & 15;
1894
   /* int cinfo = cmf >> 4; */
1895
   int flg   = zget8(a);
1896
   if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec
1897
   if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
1898
   if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png
1899
   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
1900
   return 1;
1901
}
1902
 
1903
// @TODO: should statically initialize these for optimal thread safety
1904
static uint8 default_length[288], default_distance[32];
1905
static void init_defaults(void)
1906
{
1907
   int i;   // use <= to match clearly with spec
1908
   for (i=0; i <= 143; ++i)     default_length[i]   = 8;
1909
   for (   ; i <= 255; ++i)     default_length[i]   = 9;
1910
   for (   ; i <= 279; ++i)     default_length[i]   = 7;
1911
   for (   ; i <= 287; ++i)     default_length[i]   = 8;
1912
 
1913
   for (i=0; i <=  31; ++i)     default_distance[i] = 5;
1914
}
1915
 
1916
static int parse_zlib(zbuf *a, int parse_header)
1917
{
1918
   int final, type;
1919
   if (parse_header)
1920
      if (!parse_zlib_header(a)) return 0;
1921
   a->num_bits = 0;
1922
   a->code_buffer = 0;
1923
   do {
1924
      final = zreceive(a,1);
1925
      type = zreceive(a,2);
1926
      if (type == 0) {
1927
         if (!parse_uncompressed_block(a)) return 0;
1928
      } else if (type == 3) {
1929
         return 0;
1930
      } else {
1931
         if (type == 1) {
1932
            // use fixed code lengths
1933
            if (!default_distance[31]) init_defaults();
1934
            if (!zbuild_huffman(&a->z_length  , default_length  , 288)) return 0;
1935
            if (!zbuild_huffman(&a->z_distance, default_distance,  32)) return 0;
1936
         } else {
1937
            if (!compute_huffman_codes(a)) return 0;
1938
         }
1939
         if (!parse_huffman_block(a)) return 0;
1940
      }
1941
   } while (!final);
1942
   return 1;
1943
}
1944
 
1945
static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
1946
{
1947
   a->zout_start = obuf;
1948
   a->zout       = obuf;
1949
   a->zout_end   = obuf + olen;
1950
   a->z_expandable = exp;
1951
 
1952
   return parse_zlib(a, parse_header);
1953
}
1954
 
1955
char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
1956
{
1957
   zbuf a;
1958
   char *p = (char *) malloc(initial_size);
1959
   if (p == NULL) return NULL;
1960
   a.zbuffer = (uint8 *) buffer;
1961
   a.zbuffer_end = (uint8 *) buffer + len;
1962
   if (do_zlib(&a, p, initial_size, 1, 1)) {
1963
      if (outlen) *outlen = (int) (a.zout - a.zout_start);
1964
      return a.zout_start;
1965
   } else {
1966
      free(a.zout_start);
1967
      return NULL;
1968
   }
1969
}
1970
 
1971
char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
1972
{
1973
   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
1974
}
1975
 
1976
int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
1977
{
1978
   zbuf a;
1979
   a.zbuffer = (uint8 *) ibuffer;
1980
   a.zbuffer_end = (uint8 *) ibuffer + ilen;
1981
   if (do_zlib(&a, obuffer, olen, 0, 1))
1982
      return (int) (a.zout - a.zout_start);
1983
   else
1984
      return -1;
1985
}
1986
 
1987
char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
1988
{
1989
   zbuf a;
1990
   char *p = (char *) malloc(16384);
1991
   if (p == NULL) return NULL;
1992
   a.zbuffer = (uint8 *) buffer;
1993
   a.zbuffer_end = (uint8 *) buffer+len;
1994
   if (do_zlib(&a, p, 16384, 1, 0)) {
1995
      if (outlen) *outlen = (int) (a.zout - a.zout_start);
1996
      return a.zout_start;
1997
   } else {
1998
      free(a.zout_start);
1999
      return NULL;
2000
   }
2001
}
2002
 
2003
int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2004
{
2005
   zbuf a;
2006
   a.zbuffer = (uint8 *) ibuffer;
2007
   a.zbuffer_end = (uint8 *) ibuffer + ilen;
2008
   if (do_zlib(&a, obuffer, olen, 0, 0))
2009
      return (int) (a.zout - a.zout_start);
2010
   else
2011
      return -1;
2012
}
2013
 
2014
// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
2015
//    simple implementation
2016
//      - only 8-bit samples
2017
//      - no CRC checking
2018
//      - allocates lots of intermediate memory
2019
//        - avoids problem of streaming data between subsystems
2020
//        - avoids explicit window management
2021
//    performance
2022
//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2023
 
2024
 
2025
typedef struct
2026
{
2027
   uint32 length;
2028
   uint32 type;
2029
} chunk;
2030
 
2031
#define PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2032
 
2033
static chunk get_chunk_header(stbi *s)
2034
{
2035
   chunk c;
2036
   c.length = get32(s);
2037
   c.type   = get32(s);
2038
   return c;
2039
}
2040
 
2041
static int check_png_header(stbi *s)
2042
{
2043
   static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2044
   int i;
2045
   for (i=0; i < 8; ++i)
2046
      if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG");
2047
   return 1;
2048
}
2049
 
2050
typedef struct
2051
{
2052
   stbi s;
2053
   uint8 *idata, *expanded, *out;
2054
} png;
2055
 
2056
 
2057
enum {
2058
   F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2059
   F_avg_first, F_paeth_first,
2060
};
2061
 
2062
static uint8 first_row_filter[5] =
2063
{
2064
   F_none, F_sub, F_none, F_avg_first, F_paeth_first
2065
};
2066
 
2067
static int paeth(int a, int b, int c)
2068
{
2069
   int p = a + b - c;
2070
   int pa = abs(p-a);
2071
   int pb = abs(p-b);
2072
   int pc = abs(p-c);
2073
   if (pa <= pb && pa <= pc) return a;
2074
   if (pb <= pc) return b;
2075
   return c;
2076
}
2077
 
2078
// create the png data from post-deflated data
2079
static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n)
2080
{
2081
   stbi *s = &a->s;
2082
   uint32 i,j,stride = s->img_x*out_n;
2083
   int k;
2084
   int img_n = s->img_n; // copy it into a local for later
2085
   assert(out_n == s->img_n || out_n == s->img_n+1);
2086
   a->out = (uint8 *) malloc(s->img_x * s->img_y * out_n);
2087
   if (!a->out) return e("outofmem", "Out of memory");
2088
   if (raw_len != (img_n * s->img_x + 1) * s->img_y) return e("not enough pixels","Corrupt PNG");
2089
   for (j=0; j < s->img_y; ++j) {
2090
      uint8 *cur = a->out + stride*j;
2091
      uint8 *prior = cur - stride;
2092
      int filter = *raw++;
2093
      if (filter > 4) return e("invalid filter","Corrupt PNG");
2094
      // if first row, use special filter that doesn't sample previous row
2095
      if (j == 0) filter = first_row_filter[filter];
2096
      // handle first pixel explicitly
2097
      for (k=0; k < img_n; ++k) {
2098
         switch(filter) {
2099
            case F_none       : cur[k] = raw[k]; break;
2100
            case F_sub        : cur[k] = raw[k]; break;
2101
            case F_up         : cur[k] = raw[k] + prior[k]; break;
2102
            case F_avg        : cur[k] = raw[k] + (prior[k]>>1); break;
2103
            case F_paeth      : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2104
            case F_avg_first  : cur[k] = raw[k]; break;
2105
            case F_paeth_first: cur[k] = raw[k]; break;
2106
         }
2107
      }
2108
      if (img_n != out_n) cur[img_n] = 255;
2109
      raw += img_n;
2110
      cur += out_n;
2111
      prior += out_n;
2112
      // this is a little gross, so that we don't switch per-pixel or per-component
2113
      if (img_n == out_n) {
2114
         #define CASE(f) \
2115
             case f:     \
2116
                for (i=s->img_x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2117
                   for (k=0; k < img_n; ++k)
2118
         switch(filter) {
2119
            CASE(F_none)  cur[k] = raw[k]; break;
2120
            CASE(F_sub)   cur[k] = raw[k] + cur[k-img_n]; break;
2121
            CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2122
            CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break;
2123
            CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
2124
            CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-img_n] >> 1); break;
2125
            CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break;
2126
         }
2127
         #undef CASE
2128
      } else {
2129
         assert(img_n+1 == out_n);
2130
         #define CASE(f) \
2131
             case f:     \
2132
                for (i=s->img_x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2133
                   for (k=0; k < img_n; ++k)
2134
         switch(filter) {
2135
            CASE(F_none)  cur[k] = raw[k]; break;
2136
            CASE(F_sub)   cur[k] = raw[k] + cur[k-out_n]; break;
2137
            CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2138
            CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break;
2139
            CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
2140
            CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-out_n] >> 1); break;
2141
            CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break;
2142
         }
2143
         #undef CASE
2144
      }
2145
   }
2146
   return 1;
2147
}
2148
 
2149
static int compute_transparency(png *z, uint8 tc[3], int out_n)
2150
{
2151
   stbi *s = &z->s;
2152
   uint32 i, pixel_count = s->img_x * s->img_y;
2153
   uint8 *p = z->out;
2154
 
2155
   // compute color-based transparency, assuming we've
2156
   // already got 255 as the alpha value in the output
2157
   assert(out_n == 2 || out_n == 4);
2158
 
2159
   if (out_n == 2) {
2160
      for (i=0; i < pixel_count; ++i) {
2161
         p[1] = (p[0] == tc[0] ? 0 : 255);
2162
         p += 2;
2163
      }
2164
   } else {
2165
      for (i=0; i < pixel_count; ++i) {
2166
         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2167
            p[3] = 0;
2168
         p += 4;
2169
      }
2170
   }
2171
   return 1;
2172
}
2173
 
2174
static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2175
{
2176
   uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2177
   uint8 *p, *temp_out, *orig = a->out;
2178
 
2179
   p = (uint8 *) malloc(pixel_count * pal_img_n);
2180
   if (p == NULL) return e("outofmem", "Out of memory");
2181
 
2182
   // between here and free(out) below, exitting would leak
2183
   temp_out = p;
2184
 
2185
   if (pal_img_n == 3) {
2186
      for (i=0; i < pixel_count; ++i) {
2187
         int n = orig[i]*4;
2188
         p[0] = palette[n  ];
2189
         p[1] = palette[n+1];
2190
         p[2] = palette[n+2];
2191
         p += 3;
2192
      }
2193
   } else {
2194
      for (i=0; i < pixel_count; ++i) {
2195
         int n = orig[i]*4;
2196
         p[0] = palette[n  ];
2197
         p[1] = palette[n+1];
2198
         p[2] = palette[n+2];
2199
         p[3] = palette[n+3];
2200
         p += 4;
2201
      }
2202
   }
2203
   free(a->out);
2204
   a->out = temp_out;
2205
   return 1;
2206
}
2207
 
2208
static int parse_png_file(png *z, int scan, int req_comp)
2209
{
2210
   uint8 palette[1024], pal_img_n=0;
2211
   uint8 has_trans=0, tc[3];
2212
   uint32 ioff=0, idata_limit=0, i, pal_len=0;
2213
   int first=1,k;
2214
   stbi *s = &z->s;
2215
 
2216
   if (!check_png_header(s)) return 0;
2217
 
2218
   if (scan == SCAN_type) return 1;
2219
 
2220
   for(;;first=0) {
2221
      chunk c = get_chunk_header(s);
2222
      if (first && c.type != PNG_TYPE('I','H','D','R'))
2223
         return e("first not IHDR","Corrupt PNG");
2224
      switch (c.type) {
2225
         case PNG_TYPE('I','H','D','R'): {
2226
            int depth,color,interlace,comp,filter;
2227
            if (!first) return e("multiple IHDR","Corrupt PNG");
2228
            if (c.length != 13) return e("bad IHDR len","Corrupt PNG");
2229
            s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)");
2230
            s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)");
2231
            depth = get8(s);  if (depth != 8)        return e("8bit only","PNG not supported: 8-bit only");
2232
            color = get8(s);  if (color > 6)         return e("bad ctype","Corrupt PNG");
2233
            if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG");
2234
            comp  = get8(s);  if (comp) return e("bad comp method","Corrupt PNG");
2235
            filter= get8(s);  if (filter) return e("bad filter method","Corrupt PNG");
2236
            interlace = get8(s); if (interlace) return e("interlaced","PNG not supported: interlaced mode");
2237
            if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG");
2238
            if (!pal_img_n) {
2239
               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2240
               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2241
               if (scan == SCAN_header) return 1;
2242
            } else {
2243
               // if paletted, then pal_n is our final components, and
2244
               // img_n is # components to decompress/filter.
2245
               s->img_n = 1;
2246
               if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG");
2247
               // if SCAN_header, have to scan to see if we have a tRNS
2248
            }
2249
            break;
2250
         }
2251
 
2252
         case PNG_TYPE('P','L','T','E'):  {
2253
            if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG");
2254
            pal_len = c.length / 3;
2255
            if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG");
2256
            for (i=0; i < pal_len; ++i) {
2257
               palette[i*4+0] = get8u(s);
2258
               palette[i*4+1] = get8u(s);
2259
               palette[i*4+2] = get8u(s);
2260
               palette[i*4+3] = 255;
2261
            }
2262
            break;
2263
         }
2264
 
2265
         case PNG_TYPE('t','R','N','S'): {
2266
            if (z->idata) return e("tRNS after IDAT","Corrupt PNG");
2267
            if (pal_img_n) {
2268
               if (scan == SCAN_header) { s->img_n = 4; return 1; }
2269
               if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG");
2270
               if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG");
2271
               pal_img_n = 4;
2272
               for (i=0; i < c.length; ++i)
2273
                  palette[i*4+3] = get8u(s);
2274
            } else {
2275
               if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG");
2276
               if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG");
2277
               has_trans = 1;
2278
               for (k=0; k < s->img_n; ++k)
2279
                  tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2280
            }
2281
            break;
2282
         }
2283
 
2284
         case PNG_TYPE('I','D','A','T'): {
2285
            if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG");
2286
            if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2287
            if (ioff + c.length > idata_limit) {
2288
               uint8 *p;
2289
               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2290
               while (ioff + c.length > idata_limit)
2291
                  idata_limit *= 2;
2292
               p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory");
2293
               z->idata = p;
2294
            }
2295
            #ifndef STBI_NO_STDIO
2296
            if (s->img_file)
2297
            {
2298
               if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG");
2299
            }
2300
            else
2301
            #endif
2302
            {
2303
               memcpy(z->idata+ioff, s->img_buffer, c.length);
2304
               s->img_buffer += c.length;
2305
            }
2306
            ioff += c.length;
2307
            break;
2308
         }
2309
 
2310
         case PNG_TYPE('I','E','N','D'): {
2311
            uint32 raw_len;
2312
            if (scan != SCAN_load) return 1;
2313
            if (z->idata == NULL) return e("no IDAT","Corrupt PNG");
2314
            z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len);
2315
            if (z->expanded == NULL) return 0; // zlib should set error
2316
            free(z->idata); z->idata = NULL;
2317
            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
2318
               s->img_out_n = s->img_n+1;
2319
            else
2320
               s->img_out_n = s->img_n;
2321
            if (!create_png_image(z, z->expanded, raw_len, s->img_out_n)) return 0;
2322
            if (has_trans)
2323
               if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2324
            if (pal_img_n) {
2325
               // pal_img_n == 3 or 4
2326
               s->img_n = pal_img_n; // record the actual colors we had
2327
               s->img_out_n = pal_img_n;
2328
               if (req_comp >= 3) s->img_out_n = req_comp;
2329
               if (!expand_palette(z, palette, pal_len, s->img_out_n))
2330
                  return 0;
2331
            }
2332
            free(z->expanded); z->expanded = NULL;
2333
            return 1;
2334
         }
2335
 
2336
         default:
2337
            // if critical, fail
2338
            if ((c.type & (1 << 29)) == 0) {
2339
               #ifndef STBI_NO_FAILURE_STRINGS
2340
               // not threadsafe
2341
               static char invalid_chunk[] = "XXXX chunk not known";
2342
               invalid_chunk[0] = (uint8) (c.type >> 24);
2343
               invalid_chunk[1] = (uint8) (c.type >> 16);
2344
               invalid_chunk[2] = (uint8) (c.type >>  8);
2345
               invalid_chunk[3] = (uint8) (c.type >>  0);
2346
               #endif
2347
               return e(invalid_chunk, "PNG not supported: unknown chunk type");
2348
            }
2349
            skip(s, c.length);
2350
            break;
2351
      }
2352
      // end of chunk, read and skip CRC
2353
      get32(s);
2354
   }
2355
}
2356
 
2357
static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2358
{
2359
   unsigned char *result=NULL;
2360
   p->expanded = NULL;
2361
   p->idata = NULL;
2362
   p->out = NULL;
2363
   if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
2364
   if (parse_png_file(p, SCAN_load, req_comp)) {
2365
      result = p->out;
2366
      p->out = NULL;
2367
      if (req_comp && req_comp != p->s.img_out_n) {
2368
         result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
2369
         p->s.img_out_n = req_comp;
2370
         if (result == NULL) return result;
2371
      }
2372
      *x = p->s.img_x;
2373
      *y = p->s.img_y;
2374
      if (n) *n = p->s.img_n;
2375
   }
2376
   free(p->out);      p->out      = NULL;
2377
   free(p->expanded); p->expanded = NULL;
2378
   free(p->idata);    p->idata    = NULL;
2379
 
2380
   return result;
2381
}
2382
 
2383
#ifndef STBI_NO_STDIO
2384
unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2385
{
2386
   png p;
2387
   start_file(&p.s, f);
2388
   return do_png(&p, x,y,comp,req_comp);
2389
}
2390
 
2391
unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2392
{
2393
   unsigned char *data;
2394
   FILE *f = fopen(filename, "rb");
2395
   if (!f) return NULL;
2396
   data = stbi_png_load_from_file(f,x,y,comp,req_comp);
2397
   fclose(f);
2398
   return data;
2399
}
2400
#endif
2401
 
2402
unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2403
{
2404
   png p;
2405
   start_mem(&p.s, buffer,len);
2406
   return do_png(&p, x,y,comp,req_comp);
2407
}
2408
 
2409
#ifndef STBI_NO_STDIO
2410
int stbi_png_test_file(FILE *f)
2411
{
2412
   png p;
2413
   int n,r;
2414
   n = ftell(f);
2415
   start_file(&p.s, f);
2416
   r = parse_png_file(&p, SCAN_type,STBI_default);
2417
   fseek(f,n,SEEK_SET);
2418
   return r;
2419
}
2420
#endif
2421
 
2422
int stbi_png_test_memory(stbi_uc const *buffer, int len)
2423
{
2424
   png p;
2425
   start_mem(&p.s, buffer, len);
2426
   return parse_png_file(&p, SCAN_type,STBI_default);
2427
}
2428
 
2429
// TODO: load header from png
2430
#ifndef STBI_NO_STDIO
2431
extern int      stbi_png_info             (char const *filename,           int *x, int *y, int *comp);
2432
extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
2433
#endif
2434
extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2435
 
2436
// Microsoft/Windows BMP image
2437
 
2438
static int bmp_test(stbi *s)
2439
{
2440
   int sz;
2441
   if (get8(s) != 'B') return 0;
2442
   if (get8(s) != 'M') return 0;
2443
   get32le(s); // discard filesize
2444
   get16le(s); // discard reserved
2445
   get16le(s); // discard reserved
2446
   get32le(s); // discard data offset
2447
   sz = get32le(s);
2448
   if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
2449
   return 0;
2450
}
2451
 
2452
#ifndef STBI_NO_STDIO
2453
int      stbi_bmp_test_file        (FILE *f)
2454
{
2455
   stbi s;
2456
   int r,n = ftell(f);
2457
   start_file(&s,f);
2458
   r = bmp_test(&s);
2459
   fseek(f,n,SEEK_SET);
2460
   return r;
2461
}
2462
#endif
2463
 
2464
int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len)
2465
{
2466
   stbi s;
2467
   start_mem(&s, buffer, len);
2468
   return bmp_test(&s);
2469
}
2470
 
2471
// returns 0..31 for the highest set bit
2472
static int high_bit(unsigned int z)
2473
{
2474
   int n=0;
2475
   if (z == 0) return -1;
2476
   if (z >= 0x10000) n += 16, z >>= 16;
2477
   if (z >= 0x00100) n +=  8, z >>=  8;
2478
   if (z >= 0x00010) n +=  4, z >>=  4;
2479
   if (z >= 0x00004) n +=  2, z >>=  2;
2480
   if (z >= 0x00002) n +=  1, z >>=  1;
2481
   return n;
2482
}
2483
 
2484
static int bitcount(unsigned int a)
2485
{
2486
   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
2487
   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
2488
   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
2489
   a = (a + (a >> 8)); // max 16 per 8 bits
2490
   a = (a + (a >> 16)); // max 32 per 8 bits
2491
   return a & 0xff;
2492
}
2493
 
2494
static int shiftsigned(int v, int shift, int bits)
2495
{
2496
   int result;
2497
   int z=0;
2498
 
2499
   if (shift < 0) v <<= -shift;
2500
   else v >>= shift;
2501
   result = v;
2502
 
2503
   z = bits;
2504
   while (z < 8) {
2505
      result += v >> z;
2506
      z += bits;
2507
   }
2508
   return result;
2509
}
2510
 
2511
static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2512
{
2513
   uint8 *out;
2514
   unsigned int mr=0,mg=0,mb=0,ma=0;
2515
   stbi_uc pal[256][4];
2516
   int psize=0,i,j,compress=0,width;
2517
   int bpp, flip_vertically, pad, target, offset, hsz;
2518
   if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
2519
   get32le(s); // discard filesize
2520
   get16le(s); // discard reserved
2521
   get16le(s); // discard reserved
2522
   offset = get32le(s);
2523
   hsz = get32le(s);
2524
   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
2525
   failure_reason = "bad BMP";
2526
   if (hsz == 12) {
2527
      s->img_x = get16le(s);
2528
      s->img_y = get16le(s);
2529
   } else {
2530
      s->img_x = get32le(s);
2531
      s->img_y = get32le(s);
2532
   }
2533
   if (get16le(s) != 1) return 0;
2534
   bpp = get16le(s);
2535
   if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
2536
   flip_vertically = ((int) s->img_y) > 0;
2537
   s->img_y = abs((int) s->img_y);
2538
   if (hsz == 12) {
2539
      if (bpp < 24)
2540
         psize = (offset - 14 - 24) / 3;
2541
   } else {
2542
      compress = get32le(s);
2543
      if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
2544
      get32le(s); // discard sizeof
2545
      get32le(s); // discard hres
2546
      get32le(s); // discard vres
2547
      get32le(s); // discard colorsused
2548
      get32le(s); // discard max important
2549
      if (hsz == 40 || hsz == 56) {
2550
         if (hsz == 56) {
2551
            get32le(s);
2552
            get32le(s);
2553
            get32le(s);
2554
            get32le(s);
2555
         }
2556
         if (bpp == 16 || bpp == 32) {
2557
            mr = mg = mb = 0;
2558
            if (compress == 0) {
2559
               if (bpp == 32) {
2560
                  mr = 0xff << 16;
2561
                  mg = 0xff <<  8;
2562
                  mb = 0xff <<  0;
2563
               } else {
2564
                  mr = 31 << 10;
2565
                  mg = 31 <<  5;
2566
                  mb = 31 <<  0;
2567
               }
2568
            } else if (compress == 3) {
2569
               mr = get32le(s);
2570
               mg = get32le(s);
2571
               mb = get32le(s);
2572
               // not documented, but generated by photoshop and handled by mspaint
2573
               if (mr == mg && mg == mb) {
2574
                  // ?!?!?
2575
                  return NULL;
2576
               }
2577
            } else
2578
               return NULL;
2579
         }
2580
      } else {
2581
         assert(hsz == 108);
2582
         mr = get32le(s);
2583
         mg = get32le(s);
2584
         mb = get32le(s);
2585
         ma = get32le(s);
2586
         get32le(s); // discard color space
2587
         for (i=0; i < 12; ++i)
2588
            get32le(s); // discard color space parameters
2589
      }
2590
      if (bpp < 16)
2591
         psize = (offset - 14 - hsz) >> 2;
2592
   }
2593
   s->img_n = ma ? 4 : 3;
2594
   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
2595
      target = req_comp;
2596
   else
2597
      target = s->img_n; // if they want monochrome, we'll post-convert
2598
   out = (stbi_uc *) malloc(target * s->img_x * s->img_y);
2599
   if (!out) return epuc("outofmem", "Out of memory");
2600
   if (bpp < 16) {
2601
      int z=0;
2602
      if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); }
2603
      for (i=0; i < psize; ++i) {
2604
         pal[i][2] = get8(s);
2605
         pal[i][1] = get8(s);
2606
         pal[i][0] = get8(s);
2607
         if (hsz != 12) get8(s);
2608
         pal[i][3] = 255;
2609
      }
2610
      skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
2611
      if (bpp == 4) width = (s->img_x + 1) >> 1;
2612
      else if (bpp == 8) width = s->img_x;
2613
      else { free(out); return epuc("bad bpp", "Corrupt BMP"); }
2614
      pad = (-width)&3;
2615
      for (j=0; j < (int) s->img_y; ++j) {
2616
         for (i=0; i < (int) s->img_x; i += 2) {
2617
            int v=get8(s),v2=0;
2618
            if (bpp == 4) {
2619
               v2 = v & 15;
2620
               v >>= 4;
2621
            }
2622
            out[z++] = pal[v][0];
2623
            out[z++] = pal[v][1];
2624
            out[z++] = pal[v][2];
2625
            if (target == 4) out[z++] = 255;
2626
            if (i+1 == (int) s->img_x) break;
2627
            v = (bpp == 8) ? get8(s) : v2;
2628
            out[z++] = pal[v][0];
2629
            out[z++] = pal[v][1];
2630
            out[z++] = pal[v][2];
2631
            if (target == 4) out[z++] = 255;
2632
         }
2633
         skip(s, pad);
2634
      }
2635
   } else {
2636
      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
2637
      int z = 0;
2638
      int easy=0;
2639
      skip(s, offset - 14 - hsz);
2640
      if (bpp == 24) width = 3 * s->img_x;
2641
      else if (bpp == 16) width = 2*s->img_x;
2642
      else /* bpp = 32 and pad = 0 */ width=0;
2643
      pad = (-width) & 3;
2644
      if (bpp == 24) {
2645
         easy = 1;
2646
      } else if (bpp == 32) {
2647
         if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
2648
            easy = 2;
2649
      }
2650
      if (!easy) {
2651
         if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP");
2652
         // right shift amt to put high bit in position #7
2653
         rshift = high_bit(mr)-7; rcount = bitcount(mr);
2654
         gshift = high_bit(mg)-7; gcount = bitcount(mr);
2655
         bshift = high_bit(mb)-7; bcount = bitcount(mr);
2656
         ashift = high_bit(ma)-7; acount = bitcount(mr);
2657
      }
2658
      for (j=0; j < (int) s->img_y; ++j) {
2659
         if (easy) {
2660
            for (i=0; i < (int) s->img_x; ++i) {
2661
               int a;
2662
               out[z+2] = get8(s);
2663
               out[z+1] = get8(s);
2664
               out[z+0] = get8(s);
2665
               z += 3;
2666
               a = (easy == 2 ? get8(s) : 255);
2667
               if (target == 4) out[z++] = a;
2668
            }
2669
         } else {
2670
            for (i=0; i < (int) s->img_x; ++i) {
2671
               uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
2672
               int a;
2673
               out[z++] = shiftsigned(v & mr, rshift, rcount);
2674
               out[z++] = shiftsigned(v & mg, gshift, gcount);
2675
               out[z++] = shiftsigned(v & mb, bshift, bcount);
2676
               a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
2677
               if (target == 4) out[z++] = a;
2678
            }
2679
         }
2680
         skip(s, pad);
2681
      }
2682
   }
2683
   if (flip_vertically) {
2684
      stbi_uc t;
2685
      for (j=0; j < (int) s->img_y>>1; ++j) {
2686
         stbi_uc *p1 = out +      j     *s->img_x*target;
2687
         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
2688
         for (i=0; i < (int) s->img_x*target; ++i) {
2689
            t = p1[i], p1[i] = p2[i], p2[i] = t;
2690
         }
2691
      }
2692
   }
2693
 
2694
   if (req_comp && req_comp != target) {
2695
      out = convert_format(out, target, req_comp, s->img_x, s->img_y);
2696
      if (out == NULL) return out; // convert_format frees input on failure
2697
   }
2698
 
2699
   *x = s->img_x;
2700
   *y = s->img_y;
2701
   if (comp) *comp = target;
2702
   return out;
2703
}
2704
 
2705
#ifndef STBI_NO_STDIO
2706
stbi_uc *stbi_bmp_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
2707
{
2708
   stbi_uc *data;
2709
   FILE *f = fopen(filename, "rb");
2710
   if (!f) return NULL;
2711
   data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
2712
   fclose(f);
2713
   return data;
2714
}
2715
 
2716
stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
2717
{
2718
   stbi s;
2719
   start_file(&s, f);
2720
   return bmp_load(&s, x,y,comp,req_comp);
2721
}
2722
#endif
2723
 
2724
stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2725
{
2726
   stbi s;
2727
   start_mem(&s, buffer, len);
2728
   return bmp_load(&s, x,y,comp,req_comp);
2729
}
2730
 
2731
// Targa Truevision - TGA
2732
// by Jonathan Dummer
2733
 
2734
static int tga_test(stbi *s)
2735
{
2736
	int sz;
2737
	get8u(s);		//	discard Offset
2738
	sz = get8u(s);	//	color type
2739
	if( sz > 1 ) return 0;	//	only RGB or indexed allowed
2740
	sz = get8u(s);	//	image type
2741
	if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0;	//	only RGB or grey allowed, +/- RLE
2742
	get16(s);		//	discard palette start
2743
	get16(s);		//	discard palette length
2744
	get8(s);			//	discard bits per palette color entry
2745
	get16(s);		//	discard x origin
2746
	get16(s);		//	discard y origin
2747
	if( get16(s) < 1 ) return 0;		//	test width
2748
	if( get16(s) < 1 ) return 0;		//	test height
2749
	sz = get8(s);	//	bits per pixel
2750
	if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0;	//	only RGB or RGBA or grey allowed
2751
	return 1;		//	seems to have passed everything
2752
}
2753
 
2754
#ifndef STBI_NO_STDIO
2755
int      stbi_tga_test_file        (FILE *f)
2756
{
2757
   stbi s;
2758
   int r,n = ftell(f);
2759
   start_file(&s, f);
2760
   r = tga_test(&s);
2761
   fseek(f,n,SEEK_SET);
2762
   return r;
2763
}
2764
#endif
2765
 
2766
int      stbi_tga_test_memory      (stbi_uc const *buffer, int len)
2767
{
2768
   stbi s;
2769
   start_mem(&s, buffer, len);
2770
   return tga_test(&s);
2771
}
2772
 
2773
static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2774
{
2775
	//	read in the TGA header stuff
2776
	int tga_offset = get8u(s);
2777
	int tga_indexed = get8u(s);
2778
	int tga_image_type = get8u(s);
2779
	int tga_is_RLE = 0;
2780
	int tga_palette_start = get16le(s);
2781
	int tga_palette_len = get16le(s);
2782
	int tga_palette_bits = get8u(s);
2783
	int tga_x_origin = get16le(s);
2784
	int tga_y_origin = get16le(s);
2785
	int tga_width = get16le(s);
2786
	int tga_height = get16le(s);
2787
	int tga_bits_per_pixel = get8u(s);
2788
	int tga_inverted = get8u(s);
2789
	//	image data
2790
	unsigned char *tga_data;
2791
	unsigned char *tga_palette = NULL;
2792
	int i, j;
2793
	unsigned char raw_data[4];
2794
	unsigned char trans_data[] = { 0,0,0,0 };
2795
	int RLE_count = 0;
2796
	int RLE_repeating = 0;
2797
	int read_next_pixel = 1;
2798
	//	do a tiny bit of precessing
2799
	if( tga_image_type >= 8 )
2800
	{
2801
		tga_image_type -= 8;
2802
		tga_is_RLE = 1;
2803
	}
2804
	/* int tga_alpha_bits = tga_inverted & 15; */
2805
	tga_inverted = 1 - ((tga_inverted >> 5) & 1);
2806
 
2807
	//	error check
2808
	if( //(tga_indexed) ||
2809
		(tga_width < 1) || (tga_height < 1) ||
2810
		(tga_image_type < 1) || (tga_image_type > 3) ||
2811
		((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
2812
		(tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
2813
		)
2814
	{
2815
		return NULL;
2816
	}
2817
 
2818
	//	If I'm paletted, then I'll use the number of bits from the palette
2819
	if( tga_indexed )
2820
	{
2821
		tga_bits_per_pixel = tga_palette_bits;
2822
	}
2823
 
2824
	//	tga info
2825
	*x = tga_width;
2826
	*y = tga_height;
2827
	if( (req_comp < 1) || (req_comp > 4) )
2828
	{
2829
		//	just use whatever the file was
2830
		req_comp = tga_bits_per_pixel / 8;
2831
		*comp = req_comp;
2832
	} else
2833
	{
2834
		//	force a new number of components
2835
		*comp = tga_bits_per_pixel/8;
2836
	}
2837
	tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp );
2838
 
2839
	//	skip to the data's starting position (offset usually = 0)
2840
	skip(s, tga_offset );
2841
	//	do I need to load a palette?
2842
	if( tga_indexed )
2843
	{
2844
		//	any data to skip? (offset usually = 0)
2845
		skip(s, tga_palette_start );
2846
		//	load the palette
2847
		tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 );
2848
		getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 );
2849
	}
2850
	//	load the data
2851
	for( i = 0; i < tga_width * tga_height; ++i )
2852
	{
2853
		//	if I'm in RLE mode, do I need to get a RLE chunk?
2854
		if( tga_is_RLE )
2855
		{
2856
			if( RLE_count == 0 )
2857
			{
2858
				//	yep, get the next byte as a RLE command
2859
				int RLE_cmd = get8u(s);
2860
				RLE_count = 1 + (RLE_cmd & 127);
2861
				RLE_repeating = RLE_cmd >> 7;
2862
				read_next_pixel = 1;
2863
			} else if( !RLE_repeating )
2864
			{
2865
				read_next_pixel = 1;
2866
			}
2867
		} else
2868
		{
2869
			read_next_pixel = 1;
2870
		}
2871
		//	OK, if I need to read a pixel, do it now
2872
		if( read_next_pixel )
2873
		{
2874
			//	load however much data we did have
2875
			if( tga_indexed )
2876
			{
2877
				//	read in 1 byte, then perform the lookup
2878
				int pal_idx = get8u(s);
2879
				if( pal_idx >= tga_palette_len )
2880
				{
2881
					//	invalid index
2882
					pal_idx = 0;
2883
				}
2884
				pal_idx *= tga_bits_per_pixel / 8;
2885
				for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2886
				{
2887
					raw_data[j] = tga_palette[pal_idx+j];
2888
				}
2889
			} else
2890
			{
2891
				//	read in the data raw
2892
				for( j = 0; j*8 < tga_bits_per_pixel; ++j )
2893
				{
2894
					raw_data[j] = get8u(s);
2895
				}
2896
			}
2897
			//	convert raw to the intermediate format
2898
			switch( tga_bits_per_pixel )
2899
			{
2900
			case 8:
2901
				//	Luminous => RGBA
2902
				trans_data[0] = raw_data[0];
2903
				trans_data[1] = raw_data[0];
2904
				trans_data[2] = raw_data[0];
2905
				trans_data[3] = 255;
2906
				break;
2907
			case 16:
2908
				//	Luminous,Alpha => RGBA
2909
				trans_data[0] = raw_data[0];
2910
				trans_data[1] = raw_data[0];
2911
				trans_data[2] = raw_data[0];
2912
				trans_data[3] = raw_data[1];
2913
				break;
2914
			case 24:
2915
				//	BGR => RGBA
2916
				trans_data[0] = raw_data[2];
2917
				trans_data[1] = raw_data[1];
2918
				trans_data[2] = raw_data[0];
2919
				trans_data[3] = 255;
2920
				break;
2921
			case 32:
2922
				//	BGRA => RGBA
2923
				trans_data[0] = raw_data[2];
2924
				trans_data[1] = raw_data[1];
2925
				trans_data[2] = raw_data[0];
2926
				trans_data[3] = raw_data[3];
2927
				break;
2928
			}
2929
			//	clear the reading flag for the next pixel
2930
			read_next_pixel = 0;
2931
		} // end of reading a pixel
2932
		//	convert to final format
2933
		switch( req_comp )
2934
		{
2935
		case 1:
2936
			//	RGBA => Luminance
2937
			tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
2938
			break;
2939
		case 2:
2940
			//	RGBA => Luminance,Alpha
2941
			tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
2942
			tga_data[i*req_comp+1] = trans_data[3];
2943
			break;
2944
		case 3:
2945
			//	RGBA => RGB
2946
			tga_data[i*req_comp+0] = trans_data[0];
2947
			tga_data[i*req_comp+1] = trans_data[1];
2948
			tga_data[i*req_comp+2] = trans_data[2];
2949
			break;
2950
		case 4:
2951
			//	RGBA => RGBA
2952
			tga_data[i*req_comp+0] = trans_data[0];
2953
			tga_data[i*req_comp+1] = trans_data[1];
2954
			tga_data[i*req_comp+2] = trans_data[2];
2955
			tga_data[i*req_comp+3] = trans_data[3];
2956
			break;
2957
		}
2958
		//	in case we're in RLE mode, keep counting down
2959
		--RLE_count;
2960
	}
2961
	//	do I need to invert the image?
2962
	if( tga_inverted )
2963
	{
2964
		for( j = 0; j*2 < tga_height; ++j )
2965
		{
2966
			int index1 = j * tga_width * req_comp;
2967
			int index2 = (tga_height - 1 - j) * tga_width * req_comp;
2968
			for( i = tga_width * req_comp; i > 0; --i )
2969
			{
2970
				unsigned char temp = tga_data[index1];
2971
				tga_data[index1] = tga_data[index2];
2972
				tga_data[index2] = temp;
2973
				++index1;
2974
				++index2;
2975
			}
2976
		}
2977
	}
2978
	//	clear my palette, if I had one
2979
	if( tga_palette != NULL )
2980
	{
2981
		free( tga_palette );
2982
	}
2983
	//	the things I do to get rid of an error message, and yet keep
2984
	//	Microsoft's C compilers happy... [8^(
2985
	tga_palette_start = tga_palette_len = tga_palette_bits =
2986
			tga_x_origin = tga_y_origin = 0;
2987
	//	OK, done
2988
	return tga_data;
2989
}
2990
 
2991
#ifndef STBI_NO_STDIO
2992
stbi_uc *stbi_tga_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
2993
{
2994
   stbi_uc *data;
2995
   FILE *f = fopen(filename, "rb");
2996
   if (!f) return NULL;
2997
   data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
2998
   fclose(f);
2999
   return data;
3000
}
3001
 
3002
stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3003
{
3004
   stbi s;
3005
   start_file(&s, f);
3006
   return tga_load(&s, x,y,comp,req_comp);
3007
}
3008
#endif
3009
 
3010
stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3011
{
3012
   stbi s;
3013
   start_mem(&s, buffer, len);
3014
   return tga_load(&s, x,y,comp,req_comp);
3015
}
3016
 
3017
 
3018
// *************************************************************************************************
3019
// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB
3020
 
3021
static int psd_test(stbi *s)
3022
{
3023
	if (get32(s) != 0x38425053) return 0;	// "8BPS"
3024
	else return 1;
3025
}
3026
 
3027
#ifndef STBI_NO_STDIO
3028
int stbi_psd_test_file(FILE *f)
3029
{
3030
   stbi s;
3031
   int r,n = ftell(f);
3032
   start_file(&s, f);
3033
   r = psd_test(&s);
3034
   fseek(f,n,SEEK_SET);
3035
   return r;
3036
}
3037
#endif
3038
 
3039
int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3040
{
3041
   stbi s;
3042
   start_mem(&s, buffer, len);
3043
   return psd_test(&s);
3044
}
3045
 
3046
static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3047
{
3048
	int	pixelCount;
3049
	int channelCount, compression;
3050
	int channel, i, count, len;
3051
   int w,h;
3052
   uint8 *out;
3053
 
3054
	// Check identifier
3055
	if (get32(s) != 0x38425053)	// "8BPS"
3056
		return epuc("not PSD", "Corrupt PSD image");
3057
 
3058
	// Check file type version.
3059
	if (get16(s) != 1)
3060
		return epuc("wrong version", "Unsupported version of PSD image");
3061
 
3062
	// Skip 6 reserved bytes.
3063
	skip(s, 6 );
3064
 
3065
	// Read the number of channels (R, G, B, A, etc).
3066
	channelCount = get16(s);
3067
	if (channelCount < 0 || channelCount > 16)
3068
		return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3069
 
3070
	// Read the rows and columns of the image.
3071
   h = get32(s);
3072
   w = get32(s);
3073
 
3074
	// Make sure the depth is 8 bits.
3075
	if (get16(s) != 8)
3076
		return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3077
 
3078
	// Make sure the color mode is RGB.
3079
	// Valid options are:
3080
	//   0: Bitmap
3081
	//   1: Grayscale
3082
	//   2: Indexed color
3083
	//   3: RGB color
3084
	//   4: CMYK color
3085
	//   7: Multichannel
3086
	//   8: Duotone
3087
	//   9: Lab color
3088
	if (get16(s) != 3)
3089
		return epuc("wrong color format", "PSD is not in RGB color format");
3090
 
3091
	// Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
3092
	skip(s,get32(s) );
3093
 
3094
	// Skip the image resources.  (resolution, pen tool paths, etc)
3095
	skip(s, get32(s) );
3096
 
3097
	// Skip the reserved data.
3098
	skip(s, get32(s) );
3099
 
3100
	// Find out if the data is compressed.
3101
	// Known values:
3102
	//   0: no compression
3103
	//   1: RLE compressed
3104
	compression = get16(s);
3105
	if (compression > 1)
3106
		return epuc("bad compression", "PSD has an unknown compression format");
3107
 
3108
	// Create the destination image.
3109
	out = (stbi_uc *) malloc(4 * w*h);
3110
	if (!out) return epuc("outofmem", "Out of memory");
3111
   pixelCount = w*h;
3112
 
3113
	// Initialize the data to zero.
3114
	//memset( out, 0, pixelCount * 4 );
3115
 
3116
	// Finally, the image data.
3117
	if (compression) {
3118
		// RLE as used by .PSD and .TIFF
3119
		// Loop until you get the number of unpacked bytes you are expecting:
3120
		//     Read the next source byte into n.
3121
		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3122
		//     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3123
		//     Else if n is 128, noop.
3124
		// Endloop
3125
 
3126
		// The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3127
		// which we're going to just skip.
3128
		skip(s, h * channelCount * 2 );
3129
 
3130
		// Read the RLE data by channel.
3131
		for (channel = 0; channel < 4; channel++) {
3132
			uint8 *p;
3133
 
3134
         p = out+channel;
3135
			if (channel >= channelCount) {
3136
				// Fill this channel with default data.
3137
				for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3138
			} else {
3139
				// Read the RLE data.
3140
				count = 0;
3141
				while (count < pixelCount) {
3142
					len = get8(s);
3143
					if (len == 128) {
3144
						// No-op.
3145
					} else if (len < 128) {
3146
						// Copy next len+1 bytes literally.
3147
						len++;
3148
						count += len;
3149
						while (len) {
3150
							*p = get8(s);
3151
                     p += 4;
3152
							len--;
3153
						}
3154
					} else if (len > 128) {
3155
						uint32	val;
3156
						// Next -len+1 bytes in the dest are replicated from next source byte.
3157
						// (Interpret len as a negative 8-bit int.)
3158
						len ^= 0x0FF;
3159
						len += 2;
3160
                  val = get8(s);
3161
						count += len;
3162
						while (len) {
3163
							*p = val;
3164
                     p += 4;
3165
							len--;
3166
						}
3167
					}
3168
				}
3169
			}
3170
		}
3171
 
3172
	} else {
3173
		// We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
3174
		// where each channel consists of an 8-bit value for each pixel in the image.
3175
 
3176
		// Read the data by channel.
3177
		for (channel = 0; channel < 4; channel++) {
3178
			uint8 *p;
3179
 
3180
         p = out + channel;
3181
			if (channel > channelCount) {
3182
				// Fill this channel with default data.
3183
				for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3184
			} else {
3185
				// Read the data.
3186
				count = 0;
3187
				for (i = 0; i < pixelCount; i++)
3188
					*p = get8(s), p += 4;
3189
			}
3190
		}
3191
	}
3192
 
3193
	if (req_comp && req_comp != 4) {
3194
		out = convert_format(out, 4, req_comp, w, h);
3195
		if (out == NULL) return out; // convert_format frees input on failure
3196
	}
3197
 
3198
	if (comp) *comp = channelCount;
3199
	*y = h;
3200
	*x = w;
3201
 
3202
	return out;
3203
}
3204
 
3205
#ifndef STBI_NO_STDIO
3206
stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3207
{
3208
   stbi_uc *data;
3209
   FILE *f = fopen(filename, "rb");
3210
   if (!f) return NULL;
3211
   data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
3212
   fclose(f);
3213
   return data;
3214
}
3215
 
3216
stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3217
{
3218
   stbi s;
3219
   start_file(&s, f);
3220
   return psd_load(&s, x,y,comp,req_comp);
3221
}
3222
#endif
3223
 
3224
stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3225
{
3226
   stbi s;
3227
   start_mem(&s, buffer, len);
3228
   return psd_load(&s, x,y,comp,req_comp);
3229
}
3230
 
3231
 
3232
// *************************************************************************************************
3233
// Radiance RGBE HDR loader
3234
// originally by Nicolas Schulz
3235
#ifndef STBI_NO_HDR
3236
static int hdr_test(stbi *s)
3237
{
3238
   char *signature = "#?RADIANCE\n";
3239
   int i;
3240
   for (i=0; signature[i]; ++i)
3241
      if (get8(s) != signature[i])
3242
         return 0;
3243
	return 1;
3244
}
3245
 
3246
int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
3247
{
3248
   stbi s;
3249
	start_mem(&s, buffer, len);
3250
	return hdr_test(&s);
3251
}
3252
 
3253
#ifndef STBI_NO_STDIO
3254
int stbi_hdr_test_file(FILE *f)
3255
{
3256
   stbi s;
3257
   int r,n = ftell(f);
3258
   start_file(&s, f);
3259
   r = hdr_test(&s);
3260
   fseek(f,n,SEEK_SET);
3261
   return r;
3262
}
3263
#endif
3264
 
3265
#define HDR_BUFLEN  1024
3266
static char *hdr_gettoken(stbi *z, char *buffer)
3267
{
3268
   int len=0;
3269
	//char *s = buffer,
3270
	char c = '\0';
3271
 
3272
   c = get8(z);
3273
 
3274
	while (!at_eof(z) && c != '\n') {
3275
		buffer[len++] = c;
3276
      if (len == HDR_BUFLEN-1) {
3277
         // flush to end of line
3278
         while (!at_eof(z) && get8(z) != '\n')
3279
            ;
3280
         break;
3281
      }
3282
      c = get8(z);
3283
	}
3284
 
3285
   buffer[len] = 0;
3286
	return buffer;
3287
}
3288
 
3289
static void hdr_convert(float *output, stbi_uc *input, int req_comp)
3290
{
3291
	if( input[3] != 0 ) {
3292
      float f1;
3293
		// Exponent
3294
		f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
3295
      if (req_comp <= 2)
3296
         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
3297
      else {
3298
         output[0] = input[0] * f1;
3299
         output[1] = input[1] * f1;
3300
         output[2] = input[2] * f1;
3301
      }
3302
      if (req_comp == 2) output[1] = 1;
3303
      if (req_comp == 4) output[3] = 1;
3304
	} else {
3305
      switch (req_comp) {
3306
         case 4: output[3] = 1; /* fallthrough */
3307
         case 3: output[0] = output[1] = output[2] = 0;
3308
                 break;
3309
         case 2: output[1] = 1; /* fallthrough */
3310
         case 1: output[0] = 0;
3311
                 break;
3312
      }
3313
	}
3314
}
3315
 
3316
 
3317
static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3318
{
3319
   char buffer[HDR_BUFLEN];
3320
	char *token;
3321
	int valid = 0;
3322
	int width, height;
3323
   stbi_uc *scanline;
3324
	float *hdr_data;
3325
	int len;
3326
	unsigned char count, value;
3327
	int i, j, k, c1,c2, z;
3328
 
3329
 
3330
	// Check identifier
3331
	if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3332
		return epf("not HDR", "Corrupt HDR image");
3333
 
3334
	// Parse header
3335
	while(1) {
3336
		token = hdr_gettoken(s,buffer);
3337
      if (token[0] == 0) break;
3338
		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3339
   }
3340
 
3341
	if (!valid)    return epf("unsupported format", "Unsupported HDR format");
3342
 
3343
   // Parse width and height
3344
   // can't use sscanf() if we're not using stdio!
3345
   token = hdr_gettoken(s,buffer);
3346
   if (strncmp(token, "-Y ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
3347
   token += 3;
3348
   height = strtol(token, &token, 10);
3349
   while (*token == ' ') ++token;
3350
   if (strncmp(token, "+X ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
3351
   token += 3;
3352
   width = strtol(token, NULL, 10);
3353
 
3354
	*x = width;
3355
	*y = height;
3356
 
3357
   *comp = 3;
3358
	if (req_comp == 0) req_comp = 3;
3359
 
3360
	// Read data
3361
	hdr_data = (float *) malloc(height * width * req_comp * sizeof(float));
3362
 
3363
	// Load image data
3364
   // image data is stored as some number of sca
3365
	if( width < 8 || width >= 32768) {
3366
		// Read flat data
3367
      for (j=0; j < height; ++j) {
3368
         for (i=0; i < width; ++i) {
3369
            stbi_uc rgbe[4];
3370
           main_decode_loop:
3371
            getn(s, rgbe, 4);
3372
            hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
3373
         }
3374
      }
3375
	} else {
3376
		// Read RLE-encoded data
3377
		scanline = NULL;
3378
 
3379
		for (j = 0; j < height; ++j) {
3380
         c1 = get8(s);
3381
         c2 = get8(s);
3382
         len = get8(s);
3383
         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3384
            // not run-length encoded, so we have to actually use THIS data as a decoded
3385
            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3386
            stbi_uc rgbe[4] = { c1,c2,len, get8(s) };
3387
            hdr_convert(hdr_data, rgbe, req_comp);
3388
            i = 1;
3389
            j = 0;
3390
            free(scanline);
3391
            goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format
3392
         }
3393
         len <<= 8;
3394
         len |= get8(s);
3395
         if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); }
3396
         if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4);
3397
 
3398
			for (k = 0; k < 4; ++k) {
3399
				i = 0;
3400
				while (i < width) {
3401
					count = get8(s);
3402
					if (count > 128) {
3403
						// Run
3404
						value = get8(s);
3405
                  count -= 128;
3406
						for (z = 0; z < count; ++z)
3407
							scanline[i++ * 4 + k] = value;
3408
					} else {
3409
						// Dump
3410
						for (z = 0; z < count; ++z)
3411
							scanline[i++ * 4 + k] = get8(s);
3412
					}
3413
				}
3414
			}
3415
         for (i=0; i < width; ++i)
3416
            hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
3417
		}
3418
      free(scanline);
3419
	}
3420
 
3421
   return hdr_data;
3422
}
3423
 
3424
static stbi_uc *hdr_load_rgbe(stbi *s, int *x, int *y, int *comp, int req_comp)
3425
{
3426
   char buffer[HDR_BUFLEN];
3427
	char *token;
3428
	int valid = 0;
3429
	int width, height;
3430
   stbi_uc *scanline;
3431
	stbi_uc *rgbe_data;
3432
	int len;
3433
	unsigned char count, value;
3434
	int i, j, k, c1,c2, z;
3435
 
3436
 
3437
	// Check identifier
3438
	if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
3439
		return epuc("not HDR", "Corrupt HDR image");
3440
 
3441
	// Parse header
3442
	while(1) {
3443
		token = hdr_gettoken(s,buffer);
3444
      if (token[0] == 0) break;
3445
		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
3446
   }
3447
 
3448
	if (!valid)    return epuc("unsupported format", "Unsupported HDR format");
3449
 
3450
   // Parse width and height
3451
   // can't use sscanf() if we're not using stdio!
3452
   token = hdr_gettoken(s,buffer);
3453
   if (strncmp(token, "-Y ", 3))  return epuc("unsupported data layout", "Unsupported HDR format");
3454
   token += 3;
3455
   height = strtol(token, &token, 10);
3456
   while (*token == ' ') ++token;
3457
   if (strncmp(token, "+X ", 3))  return epuc("unsupported data layout", "Unsupported HDR format");
3458
   token += 3;
3459
   width = strtol(token, NULL, 10);
3460
 
3461
	*x = width;
3462
	*y = height;
3463
 
3464
	// RGBE _MUST_ come out as 4 components
3465
   *comp = 4;
3466
	req_comp = 4;
3467
 
3468
	// Read data
3469
	rgbe_data = (stbi_uc *) malloc(height * width * req_comp * sizeof(stbi_uc));
3470
	//	point to the beginning
3471
	scanline = rgbe_data;
3472
 
3473
	// Load image data
3474
   // image data is stored as some number of scan lines
3475
	if( width < 8 || width >= 32768) {
3476
		// Read flat data
3477
      for (j=0; j < height; ++j) {
3478
         for (i=0; i < width; ++i) {
3479
           main_decode_loop:
3480
            //getn(rgbe, 4);
3481
            getn(s,scanline, 4);
3482
			scanline += 4;
3483
         }
3484
      }
3485
	} else {
3486
		// Read RLE-encoded data
3487
		for (j = 0; j < height; ++j) {
3488
         c1 = get8(s);
3489
         c2 = get8(s);
3490
         len = get8(s);
3491
         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
3492
            // not run-length encoded, so we have to actually use THIS data as a decoded
3493
            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
3494
            scanline[0] = c1;
3495
            scanline[1] = c2;
3496
            scanline[2] = len;
3497
            scanline[3] = get8(s);
3498
            scanline += 4;
3499
            i = 1;
3500
            j = 0;
3501
            goto main_decode_loop; // yes, this is insane; blame the insane format
3502
         }
3503
         len <<= 8;
3504
         len |= get8(s);
3505
         if (len != width) { free(rgbe_data); return epuc("invalid decoded scanline length", "corrupt HDR"); }
3506
			for (k = 0; k < 4; ++k) {
3507
				i = 0;
3508
				while (i < width) {
3509
					count = get8(s);
3510
					if (count > 128) {
3511
						// Run
3512
						value = get8(s);
3513
                  count -= 128;
3514
						for (z = 0; z < count; ++z)
3515
							scanline[i++ * 4 + k] = value;
3516
					} else {
3517
						// Dump
3518
						for (z = 0; z < count; ++z)
3519
							scanline[i++ * 4 + k] = get8(s);
3520
					}
3521
				}
3522
			}
3523
			//	move the scanline on
3524
			scanline += 4 * width;
3525
		}
3526
	}
3527
 
3528
   return rgbe_data;
3529
}
3530
 
3531
#ifndef STBI_NO_STDIO
3532
float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3533
{
3534
   stbi s;
3535
   start_file(&s,f);
3536
   return hdr_load(&s,x,y,comp,req_comp);
3537
}
3538
 
3539
stbi_uc *stbi_hdr_load_rgbe_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3540
{
3541
   stbi s;
3542
   start_file(&s,f);
3543
   return hdr_load_rgbe(&s,x,y,comp,req_comp);
3544
}
3545
 
3546
stbi_uc *stbi_hdr_load_rgbe        (char const *filename,           int *x, int *y, int *comp, int req_comp)
3547
{
3548
   FILE *f = fopen(filename, "rb");
3549
   unsigned char *result;
3550
   if (!f) return epuc("can't fopen", "Unable to open file");
3551
   result = stbi_hdr_load_rgbe_file(f,x,y,comp,req_comp);
3552
   fclose(f);
3553
   return result;
3554
}
3555
#endif
3556
 
3557
float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3558
{
3559
   stbi s;
3560
   start_mem(&s,buffer, len);
3561
   return hdr_load(&s,x,y,comp,req_comp);
3562
}
3563
 
3564
stbi_uc *stbi_hdr_load_rgbe_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
3565
{
3566
   stbi s;
3567
   start_mem(&s,buffer, len);
3568
   return hdr_load_rgbe(&s,x,y,comp,req_comp);
3569
}
3570
 
3571
#endif // STBI_NO_HDR
3572
 
3573
/////////////////////// write image ///////////////////////
3574
 
3575
#ifndef STBI_NO_WRITE
3576
 
3577
static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); }
3578
 
3579
static void writefv(FILE *f, char *fmt, va_list v)
3580
{
3581
   while (*fmt) {
3582
      switch (*fmt++) {
3583
         case ' ': break;
3584
         case '1': { uint8 x = va_arg(v, int); write8(f,x); break; }
3585
         case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; }
3586
         case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; }
3587
         default:
3588
            assert(0);
3589
            va_end(v);
3590
            return;
3591
      }
3592
   }
3593
}
3594
 
3595
static void writef(FILE *f, char *fmt, ...)
3596
{
3597
   va_list v;
3598
   va_start(v, fmt);
3599
   writefv(f,fmt,v);
3600
   va_end(v);
3601
}
3602
 
3603
static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad)
3604
{
3605
   uint8 bg[3] = { 255, 0, 255}, px[3];
3606
   uint32 zero = 0;
3607
   int i,j,k, j_end;
3608
 
3609
   if (vdir < 0)
3610
      j_end = -1, j = y-1;
3611
   else
3612
      j_end =  y, j = 0;
3613
 
3614
   for (; j != j_end; j += vdir) {
3615
      for (i=0; i < x; ++i) {
3616
         uint8 *d = (uint8 *) data + (j*x+i)*comp;
3617
         if (write_alpha < 0)
3618
            fwrite(&d[comp-1], 1, 1, f);
3619
         switch (comp) {
3620
            case 1:
3621
            case 2: writef(f, "111", d[0],d[0],d[0]);
3622
                    break;
3623
            case 4:
3624
               if (!write_alpha) {
3625
                  for (k=0; k < 3; ++k)
3626
                     px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255;
3627
                  writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]);
3628
                  break;
3629
               }
3630
               /* FALLTHROUGH */
3631
            case 3:
3632
               writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]);
3633
               break;
3634
         }
3635
         if (write_alpha > 0)
3636
            fwrite(&d[comp-1], 1, 1, f);
3637
      }
3638
      fwrite(&zero,scanline_pad,1,f);
3639
   }
3640
}
3641
 
3642
static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...)
3643
{
3644
   FILE *f = fopen(filename, "wb");
3645
   if (f) {
3646
      va_list v;
3647
      va_start(v, fmt);
3648
      writefv(f, fmt, v);
3649
      va_end(v);
3650
      write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad);
3651
      fclose(f);
3652
   }
3653
   return f != NULL;
3654
}
3655
 
3656
int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data)
3657
{
3658
   int pad = (-x*3) & 3;
3659
   return outfile(filename,-1,-1,x,y,comp,data,0,pad,
3660
           "11 4 22 4" "4 44 22 444444",
3661
           'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
3662
            40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
3663
}
3664
 
3665
int stbi_write_tga(char const *filename, int x, int y, int comp, void *data)
3666
{
3667
   int has_alpha = !(comp & 1);
3668
   return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0,
3669
                  "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha);
3670
}
3671
 
3672
// any other image formats that do interleaved rgb data?
3673
//    PNG: requires adler32,crc32 -- significant amount of code
3674
//    PSD: no, channels output separately
3675
//    TIFF: no, stripwise-interleaved... i think
3676
 
3677
#endif // STBI_NO_WRITE
3678
 
3679
//	add in my DDS loading support
3680
#ifndef STBI_NO_DDS
3681
#include "stbi_DDS_aug_c.h"
3682
#endif