diff --git a/Conv2D_8h_source.html b/Conv2D_8h_source.html new file mode 100644 index 00000000..132a9e53 --- /dev/null +++ b/Conv2D_8h_source.html @@ -0,0 +1,129 @@ + + + + + + + +TinyChatEngine: llm/include/ops/Conv2D.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Conv2D.h
+
+
+
1#include "common.h"
+
2#include <limits>
+
3
+
+ +
5 Matrix4D<float> weight;
+ +
7 int stride_width = 1;
+
8 int stride_height = 1;
+
9 int dilation_width_factor = 1;
+
10 int dilation_height_factor = 1;
+
11 int padding_width = 0;
+
12 int padding_height = 0;
+
13 float float_activation_min = -std::numeric_limits<float>::max();
+
14 float float_activation_max = std::numeric_limits<float>::max();
+
15};
+
+
16
+
+
17class Conv2D {
+
18 public:
+
19 Conv2D(Conv2D_params params_) : params(params_){};
+
20 Conv2D(){};
+
21 void forward(const Matrix3D<float> &input, Matrix3D<float> &output);
+
22 struct Conv2D_params params;
+
23 bool has_bias = false;
+
24
+
25 private:
+
26 std::string profile_name = "Conv2D";
+
27};
+
+
28
+
29void load_Conv2D(Conv2D &op, std::string prefix);
+
Definition Conv2D.h:17
+
Definition common.h:34
+
Definition common.h:129
+
Definition Conv2D.h:4
+
+ + + + diff --git a/Fp32CLIPAttention_8h_source.html b/Fp32CLIPAttention_8h_source.html new file mode 100644 index 00000000..37175fdd --- /dev/null +++ b/Fp32CLIPAttention_8h_source.html @@ -0,0 +1,150 @@ + + + + + + + +TinyChatEngine: llm/include/nn_modules/Fp32CLIPAttention.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Fp32CLIPAttention.h
+
+
+
1#include <utility>
+
2
+
3#include "common.h"
+
4#include "operators.h"
+
5
+
+ +
7 Matrix3D<float> attn_output;
+
8 Matrix3D<float> attn_probs_reshaped;
+
9 std::pair<Matrix3D<float>, Matrix3D<float>> past_key_value;
+
10};
+
+
+ +
12 Matrix3D<float> hidden_states;
+
13 Matrix3D<float> attention_mask;
+
14 Matrix3D<float> past_key, past_value;
+
15 bool has_past_key_value = false;
+
16 int layer_idx;
+
17
+
18 Fp32CLIPAttention_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_, int layer_idx_)
+
19 : hidden_states(hidden_states_), attention_mask(attention_mask_), layer_idx(layer_idx_) {}
+
20
+
21 Fp32CLIPAttention_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_, Matrix3D<float> past_key_,
+
22 Matrix3D<float> past_value_, bool has_past_key_value_, int layer_idx_)
+
23 : hidden_states(hidden_states_),
+
24 attention_mask(attention_mask_),
+
25 past_key(past_key_),
+
26 past_value(past_value_),
+
27 has_past_key_value(has_past_key_value_),
+
28 layer_idx(layer_idx_) {}
+
29};
+
+
30
+
+ +
32 public:
+
33 Fp32CLIPAttention(std::string param_path, const struct model_config config);
+ +
35 static void initialized_memory(const struct model_config config);
+
36 struct Fp32CLIPAttention_output forward(const struct Fp32CLIPAttention_input &input);
+
37
+
38 private:
+
39 void unshape(Matrix3D<float> shaped, Matrix3D<float> unshape, int sqlen);
+
40 void shape(Matrix3D<float> unshape, Matrix3D<float> shaped, int sqlen);
+
41 int embed_dim, num_heads, head_dim;
+
42 Linear_FP k_proj, v_proj, q_proj, out_proj;
+
43 BMM_F32T qk_bmm, pv_bmm;
+
44 std::string profile_name = "Fp32CLIPAttention";
+
45};
+
+
Definition BMM_F32T.h:3
+
Definition Fp32CLIPAttention.h:31
+
Definition linear.h:6
+
Definition common.h:34
+
Definition Fp32CLIPAttention.h:11
+
Definition Fp32CLIPAttention.h:6
+
Definition model.h:5
+
+ + + + diff --git a/Fp32CLIPEncoderLayer_8h_source.html b/Fp32CLIPEncoderLayer_8h_source.html new file mode 100644 index 00000000..3d77023f --- /dev/null +++ b/Fp32CLIPEncoderLayer_8h_source.html @@ -0,0 +1,155 @@ + + + + + + + +TinyChatEngine: llm/include/nn_modules/Fp32CLIPEncoderLayer.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Fp32CLIPEncoderLayer.h
+
+
+
1#include "Fp32CLIPAttention.h"
+
2#include "common.h"
+
3#include "operators.h"
+
4
+
+ +
6 Matrix3D<float> hidden_states;
+
7 Matrix3D<float> attentions;
+
8 std::pair<Matrix3D<float>, Matrix3D<float>> past_key_value;
+
9
+ +
11 std::pair<Matrix3D<float>, Matrix3D<float>> past_key_value_) {
+
12 hidden_states = hidden_states_;
+
13 attentions = attentions_;
+
14 past_key_value = past_key_value_;
+
15 };
+
16};
+
+
+ +
18 Matrix3D<float> hidden_states;
+
19 Matrix3D<float> attention_mask;
+
20 Matrix3D<float> past_key, past_value;
+
21 bool has_past_key_value = false;
+
22
+
23 Fp32CLIPEncoderLayer_input(Matrix3D<float> &hidden_states_, Matrix3D<float> attention_mask_) {
+
24 hidden_states = hidden_states_;
+
25 attention_mask = attention_mask_;
+
26 has_past_key_value = false;
+
27 }
+
28
+
29 Fp32CLIPEncoderLayer_input(Matrix3D<float> &hidden_states_, Matrix3D<float> attention_mask_,
+
30 Matrix3D<float> past_key_, Matrix3D<float> past_value_) {
+
31 hidden_states = hidden_states_;
+
32 attention_mask = attention_mask_;
+
33 past_key = past_key_;
+
34 past_value = past_value_;
+
35 has_past_key_value = true;
+
36 }
+
37};
+
+
38
+
+ +
40 public:
+
41 Fp32CLIPEncoderLayer(std::string param_path, const struct model_config config, int layer_idx);
+
42 struct Fp32CLIPEncoderLayer_output forward(const struct Fp32CLIPEncoderLayer_input &input);
+
43
+
44 int embed_dim, num_attention_heads, hidden_dim, layer_idx;
+
45 LayerNorm layer_norm1, layer_norm2;
+
46 Linear_FP mlp_fc1, mlp_fc2;
+ +
48 std::string profile_name = "Fp32CLIPEncoderLayer";
+
49};
+
+
Definition Fp32CLIPAttention.h:31
+
Definition Fp32CLIPEncoderLayer.h:39
+
Definition LayerNorm.h:8
+
Definition linear.h:6
+
Definition common.h:34
+
Definition Fp32CLIPEncoderLayer.h:17
+
Definition Fp32CLIPEncoderLayer.h:5
+
Definition model.h:5
+
+ + + + diff --git a/Fp32CLIPEncoder_8h_source.html b/Fp32CLIPEncoder_8h_source.html new file mode 100644 index 00000000..7673617f --- /dev/null +++ b/Fp32CLIPEncoder_8h_source.html @@ -0,0 +1,140 @@ + + + + + + + +TinyChatEngine: llm/include/nn_modules/Fp32CLIPEncoder.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Fp32CLIPEncoder.h
+
+
+
1#include <cstdlib>
+
2#include <string>
+
3#include <vector>
+
4
+
5#include "Fp32CLIPEncoderLayer.h"
+
6#include "common.h"
+
7#include "operators.h"
+
8
+
+ +
10 Matrix3D<float> last_hidden_state;
+
11 std::vector<Matrix3D<float>> past_keys, past_values;
+
12};
+
+
+ +
14 Matrix3D<float> hidden_states;
+
15 Matrix3D<float> attention_mask;
+
16 std::vector<Matrix3D<float>> past_keys, past_values;
+
17 bool has_past_keys_values;
+
18
+
19 Fp32CLIPEncoder_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_)
+
20 : hidden_states(hidden_states_), attention_mask(attention_mask_) {
+
21 has_past_keys_values = false;
+
22 }
+
23 Fp32CLIPEncoder_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_,
+
24 std::vector<Matrix3D<float>> past_keys_, std::vector<Matrix3D<float>> past_values_)
+
25 : hidden_states(hidden_states_), attention_mask(attention_mask_), past_keys(past_keys_), past_values(past_values_) {
+
26 has_past_keys_values = true;
+
27 }
+
28};
+
+
29
+
+ +
31 public:
+
32 Fp32CLIPEncoder(std::string param_path, const struct model_config config);
+ +
34 struct Fp32CLIPEncoder_output forward(const struct Fp32CLIPEncoder_input& input);
+
35 std::vector<Fp32CLIPEncoderLayer> layers;
+
36 std::string profile_name = "Fp32CLIPEncoder";
+
37};
+
+
Definition Fp32CLIPEncoder.h:30
+
Definition common.h:34
+
Definition Fp32CLIPEncoder.h:13
+
Definition Fp32CLIPEncoder.h:9
+
Definition model.h:5
+
+ + + + diff --git a/Fp32CLIPVisionTransformer_8h_source.html b/Fp32CLIPVisionTransformer_8h_source.html new file mode 100644 index 00000000..f1474c38 --- /dev/null +++ b/Fp32CLIPVisionTransformer_8h_source.html @@ -0,0 +1,159 @@ + + + + + + + +TinyChatEngine: llm/include/nn_modules/Fp32CLIPVisionTransformer.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Fp32CLIPVisionTransformer.h
+
+
+
1#include <cstdlib>
+
2#include <string>
+
3#include <vector>
+
4
+
5#include "Fp32CLIPEncoder.h"
+
6#include "common.h"
+
7#include "operators.h"
+
8
+
+ +
10 Matrix3D<float> last_hidden_state;
+
11 std::vector<Matrix3D<float>> past_keys, past_values;
+
12};
+
+
+ +
14 Matrix3D<float> input_image;
+
15 std::vector<Matrix3D<float>> past_keys, past_values;
+
16 bool has_past_keys_values;
+
17
+ +
19 Fp32CLIPVisionTransformer_input(Matrix3D<float> input_image_) : input_image(input_image_) { has_past_keys_values = false; }
+
20 Fp32CLIPVisionTransformer_input(Matrix3D<float> input_image_, std::vector<Matrix3D<float>> past_keys_,
+
21 std::vector<Matrix3D<float>> past_values_)
+
22 : input_image(input_image_), past_keys(past_keys_), past_values(past_values_) {
+
23 has_past_keys_values = true;
+
24 }
+
25};
+
+
26
+
+ +
28 public:
+
29 Fp32CLIPVisionTransformer(std::string param_path, const struct model_config config);
+ +
31 struct Fp32CLIPVisionTransformer_output forward(const struct Fp32CLIPVisionTransformer_input& input);
+
32 Embedding embed_positions;
+
33 Conv2D embed_patch;
+
34 LayerNorm pre_layernorm;
+
35 Linear_FP mm_proj_0, mm_proj_2;
+
36 int voc_size, embed_dim, padding_idx, hidden_dim, num_heads, image_size, patch_size, num_patches, num_positions,
+
37 projection_dim, mmproj_dim;
+
38 std::vector<Fp32CLIPEncoderLayer> layers;
+
39 std::string profile_name = "Fp32CLIPVisionTransformer";
+
40
+
41 private:
+
42 Fp32CLIPEncoder encoder;
+
43 float* patch_embeds_buf;
+
44 float* class_embeds_buf;
+
45 float* pos_embeds_buf;
+
46 float* last_hidden_states_buf;
+
47 float* hidden_states_buf;
+
48 float* embeddings_buf;
+
49 float* mm_proj_0_arr;
+
50 float* mm_proj_2_arr;
+
51};
+
+
Definition Conv2D.h:17
+
Definition Embedding.h:5
+
Definition Fp32CLIPEncoder.h:30
+
Definition Fp32CLIPVisionTransformer.h:27
+
Definition LayerNorm.h:8
+
Definition linear.h:6
+
Definition common.h:34
+
Definition Fp32CLIPVisionTransformer.h:13
+
Definition Fp32CLIPVisionTransformer.h:9
+
Definition model.h:5
+
+ + + + diff --git a/Fp32llamaDecoder_8h_source.html b/Fp32llamaDecoder_8h_source.html index ff5242bc..ad3ddb23 100644 --- a/Fp32llamaDecoder_8h_source.html +++ b/Fp32llamaDecoder_8h_source.html @@ -100,41 +100,63 @@
13struct Fp32llamaDecoder_input {
14 Matrix3D<int> input_ids;
-
15 std::vector<Matrix3D<float>> past_keys, past_values;
-
16 bool has_past_keys_values;
-
17
-
18 Fp32llamaDecoder_input(Matrix3D<int> input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; }
-
19 Fp32llamaDecoder_input(Matrix3D<int> input_ids_, std::vector<Matrix3D<float>> past_keys_,
-
20 std::vector<Matrix3D<float>> past_values_)
-
21 : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) {
-
22 has_past_keys_values = true;
-
23 }
-
24};
+
15 Matrix3D<float> image_embed;
+
16 Matrix3D<int> second_input_ids;
+
17 std::vector<Matrix3D<float>> past_keys, past_values;
+
18 bool has_past_keys_values;
+
19 bool is_llava;
+
20
+
21 Fp32llamaDecoder_input() {}
+
22 Fp32llamaDecoder_input(Matrix3D<int> input_ids_) : input_ids(input_ids_) {
+
23 has_past_keys_values = false;
+
24 is_llava = false;
+
25 }
+
26 Fp32llamaDecoder_input(Matrix3D<int> input_ids_, std::vector<Matrix3D<float>> past_keys_,
+
27 std::vector<Matrix3D<float>> past_values_)
+
28 : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) {
+
29 has_past_keys_values = true;
+
30 is_llava = false;
+
31 }
+
32 Fp32llamaDecoder_input(Matrix3D<int> input_ids_, Matrix3D<float> image_embed_, Matrix3D<int> second_input_ids_)
+
33 : input_ids(input_ids_), image_embed(image_embed_), second_input_ids(second_input_ids_) {
+
34 has_past_keys_values = false;
+
35 is_llava = true;
+
36 }
+
37 Fp32llamaDecoder_input(Matrix3D<int> input_ids_, Matrix3D<float> image_embed_)
+
38 : input_ids(input_ids_), image_embed(image_embed_) {
+
39 has_past_keys_values = false;
+
40 is_llava = true;
+
41 }
+
42};
-
25
-
-
26class Fp32llamaDecoder {
-
27 public:
-
28 Fp32llamaDecoder(std::string param_path, const struct model_config config);
-
29 Fp32llamaDecoder(){};
-
30 Matrix3D<float> prepare_decoder_attention_mask(int length, int past_length);
-
31 struct Fp32llamaDecoder_output forward(const struct Fp32llamaDecoder_input& input);
-
32 Embedding embed_tokens;
-
33 LlamaRMSNorm norm;
-
34 float rms_norm_eps;
-
35 int voc_size, embed_dim, padding_idx, hidden_dim, num_heads;
-
36 std::vector<Fp32llamaDecoderLayer> layers;
-
37 std::string profile_name = "Fp32llamaDecoder";
-
38
-
39 private:
-
40 float* attention_mask_buf;
-
41 float* pos_embeds_buf;
-
42 float* last_hidden_states_buf;
-
43 float* hidden_states_buf;
-
44};
+
43
+
+ +
45 public:
+
46 Fp32llamaDecoder(std::string param_path, const struct model_config config);
+ +
48 Matrix3D<float> prepare_decoder_attention_mask(int length, int past_length);
+
49 struct Fp32llamaDecoder_output forward(const struct Fp32llamaDecoder_input& input);
+
50 Embedding embed_tokens;
+
51 LlamaRMSNorm norm;
+
52 float rms_norm_eps;
+
53 int voc_size, embed_dim, padding_idx, hidden_dim, num_heads;
+
54 std::vector<Fp32llamaDecoderLayer> layers;
+
55 std::string profile_name = "Fp32llamaDecoder";
+
56
+
57 private:
+
58 float* attention_mask_buf;
+
59 float* pos_embeds_buf;
+
60 float* last_hidden_states_buf;
+
61 float* hidden_states_buf;
+
62 float* inputs_embeds_buf;
+
63 float* first_input_ids_buf;
+
64 float* image_embed_buf;
+
65 float* second_input_ids_buf;
+
66};
Definition Embedding.h:5
-
Definition Fp32llamaDecoder.h:26
+
Definition Fp32llamaDecoder.h:44
Definition LlamaRMSNorm.h:4
Definition common.h:34
Definition Fp32llamaDecoder.h:13
diff --git a/Fp32llamaForCausalLM_8h_source.html b/Fp32llamaForCausalLM_8h_source.html index c62b276d..75660ba1 100644 --- a/Fp32llamaForCausalLM_8h_source.html +++ b/Fp32llamaForCausalLM_8h_source.html @@ -94,36 +94,53 @@
8 Matrix3D<int> input_ids;
-
9 std::vector<Matrix3D<float>> past_keys, past_values;
-
10 bool has_past_keys_values;
-
11
- -
13 Fp32LlamaForCausalLM_input(Matrix3D<int> input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; }
-
14 Fp32LlamaForCausalLM_input(Matrix3D<int> input_ids_, std::vector<Matrix3D<float>> past_keys_,
-
15 std::vector<Matrix3D<float>> past_values_)
-
16 : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) {
-
17 has_past_keys_values = true;
-
18 }
-
19};
+
9 Matrix3D<float> image_embed;
+
10 Matrix3D<int> second_input_ids;
+
11 std::vector<Matrix3D<float>> past_keys, past_values;
+
12 bool has_past_keys_values;
+
13 bool is_llava;
+
14
+ +
16 Fp32LlamaForCausalLM_input(Matrix3D<int> input_ids_) : input_ids(input_ids_) {
+
17 has_past_keys_values = false;
+
18 is_llava = false;
+
19 }
+
20 Fp32LlamaForCausalLM_input(Matrix3D<int> input_ids_, std::vector<Matrix3D<float>> past_keys_,
+
21 std::vector<Matrix3D<float>> past_values_)
+
22 : input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) {
+
23 has_past_keys_values = true;
+
24 is_llava = false;
+
25 }
+
26 Fp32LlamaForCausalLM_input(Matrix3D<int> input_ids_, Matrix3D<float> image_embed_, Matrix3D<int> second_input_ids_)
+
27 : input_ids(input_ids_), image_embed(image_embed_), second_input_ids(second_input_ids_) {
+
28 has_past_keys_values = false;
+
29 is_llava = true;
+
30 }
+ +
32 : input_ids(input_ids_), image_embed(image_embed_) {
+
33 has_past_keys_values = false;
+
34 is_llava = true;
+
35 }
+
36};
-
20
-
- -
22 public:
-
23 Fp32LlamaForCausalLM(std::string param_path, const struct model_config config);
-
24
-
25 struct Fp32LlamaForCausalLM_output forward(const struct Fp32LlamaForCausalLM_input& input);
-
26
-
27 private:
-
28 Fp32llamaDecoder decoder;
-
29 Linear_FP lm_head;
-
30 std::string profile_name = "Fp32LlamaForCausalLM";
-
31 float* logits_output;
-
32 float* lm_head_weight;
-
33};
+
37
+
+ +
39 public:
+
40 Fp32LlamaForCausalLM(std::string param_path, const struct model_config config);
+
41
+
42 struct Fp32LlamaForCausalLM_output forward(const struct Fp32LlamaForCausalLM_input& input);
+
43
+
44 private:
+
45 Fp32llamaDecoder decoder;
+
46 Linear_FP lm_head;
+
47 std::string profile_name = "Fp32LlamaForCausalLM";
+
48 float* logits_output;
+
49 float* lm_head_weight;
+
50};
-
Definition Fp32llamaForCausalLM.h:21
-
Definition Fp32llamaDecoder.h:26
+
Definition Fp32llamaForCausalLM.h:38
+
Definition Fp32llamaDecoder.h:44
Definition linear.h:6
Definition common.h:34
Definition Fp32llamaForCausalLM.h:7
diff --git a/Gelu_8h_source.html b/Gelu_8h_source.html new file mode 100644 index 00000000..42a70810 --- /dev/null +++ b/Gelu_8h_source.html @@ -0,0 +1,99 @@ + + + + + + + +TinyChatEngine: llm/include/ops/Gelu.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
Gelu.h
+
+
+
1#include "common.h"
+
2
+
3float Gelu_imp(float x);
+
4void Gelu(Matrix3D<float> a);
+
5float Gelu_quick_imp(float x);
+
6void Gelu_quick(Matrix3D<float> a);
+
Definition common.h:34
+
+ + + + diff --git a/Generate_8h_source.html b/Generate_8h_source.html index 90a072a9..9f8d74d8 100644 --- a/Generate_8h_source.html +++ b/Generate_8h_source.html @@ -109,100 +109,105 @@
24#include "Int4OPTForCausalLM.h"
25#include "Int4llamaForCausalLM.h"
26#include "Int4GPTBigCodeForCausalLM.h"
-
27#include "OPTForCausalLM.h"
-
28#include "OPTTokenizer.h"
-
29#include "operators.h"
-
30#include "utils.h"
-
31
-
32// inline std::mt19937 OPT_rng; // inline variables are only available with ‘-std=c++17’ or ‘-std=gnu++17’
-
33static std::mt19937 OPT_rng;
-
34
-
-
35typedef struct OPT_token_data {
-
36 int id; // token id
-
37 float logit; // log-odds of the token
-
38 float p; // probability of the token
- +
27#include "Fp32CLIPVisionTransformer.h"
+
28#include "OPTForCausalLM.h"
+
29#include "OPTTokenizer.h"
+
30#include "operators.h"
+
31#include "utils.h"
+
32
+
33// inline std::mt19937 OPT_rng; // inline variables are only available with ‘-std=c++17’ or ‘-std=gnu++17’
+
34static std::mt19937 OPT_rng;
+
35
+
+
36typedef struct OPT_token_data {
+
37 int id; // token id
+
38 float logit; // log-odds of the token
+
39 float p; // probability of the token
+
-
40
-
-
41typedef struct OPT_token_data_array {
-
42 OPT_token_data* data;
-
43 size_t size;
-
44 bool sorted;
- +
41
+
+
42typedef struct OPT_token_data_array {
+
43 OPT_token_data* data;
+
44 size_t size;
+
45 bool sorted;
+
-
46
-
-
47struct opt_params {
-
48 int32_t seed = -1; // RNG seed
-
49 int32_t n_threads = 1; // TODO: fix this
-
50 int32_t n_predict = 128; // new tokens to predict
-
51 int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
-
52 int32_t n_ctx = 512; // context size
-
53 int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
-
54 int32_t n_keep = 0; // number of tokens to keep from initial prompt
-
55 int32_t n_vocab = 50272; // vocabulary size
-
56
-
57 // sampling parameters
-
58 std::unordered_map<int, float> logit_bias; // logit bias for specific tokens
-
59 int32_t top_k = 40; // <= 0 to use vocab size
-
60 float top_p = 0.95f; // 1.0 = disabled
-
61 float tfs_z = 1.00f; // 1.0 = disabled
-
62 float typical_p = 1.00f; // 1.0 = disabled
-
63 float temp = 0.80f; // 1.0 = disabled
-
64 float repeat_penalty = 1.10f; // 1.0 = disabled
-
65 int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
-
66 float frequency_penalty = 0.00f; // 0.0 = disabled
-
67 float presence_penalty = 0.00f; // 0.0 = disabled
-
68 int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-
69 float mirostat_tau = 5.00f; // target entropy
-
70 float mirostat_eta = 0.10f; // learning rate
-
71};
+
47
+
+
48struct opt_params {
+
49 int32_t seed = -1; // RNG seed
+
50 int32_t n_threads = 1; // TODO: fix this
+
51 int32_t n_predict = 128; // new tokens to predict
+
52 int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
+
53 int32_t n_ctx = 512; // context size
+
54 int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
+
55 int32_t n_keep = 0; // number of tokens to keep from initial prompt
+
56 int32_t n_vocab = 50272; // vocabulary size
+
57
+
58 // sampling parameters
+
59 std::unordered_map<int, float> logit_bias; // logit bias for specific tokens
+
60 int32_t top_k = 40; // <= 0 to use vocab size
+
61 float top_p = 0.95f; // 1.0 = disabled
+
62 float tfs_z = 1.00f; // 1.0 = disabled
+
63 float typical_p = 1.00f; // 1.0 = disabled
+
64 float temp = 0.80f; // 1.0 = disabled
+
65 float repeat_penalty = 1.10f; // 1.0 = disabled
+
66 int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
+
67 float frequency_penalty = 0.00f; // 0.0 = disabled
+
68 float presence_penalty = 0.00f; // 0.0 = disabled
+
69 int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+
70 float mirostat_tau = 5.00f; // target entropy
+
71 float mirostat_eta = 0.10f; // learning rate
+
72};
-
72
-
73void sample_repetition_penalty(OPT_token_data_array* candidates, const int* last_tokens, size_t last_tokens_size,
-
74 float penalty);
-
75
-
76void sample_frequency_and_presence_penalties(OPT_token_data_array* candidates, const int* last_tokens_p,
-
77 size_t last_tokens_size, float alpha_frequency, float alpha_presence);
-
78
-
79int sample_token_greedy(OPT_token_data_array* candidates);
-
80
-
81void sample_temperature(OPT_token_data_array* candidates_p, float temp);
-
82
-
83void sample_softmax(OPT_token_data_array* candidates);
-
84
-
85int sample_token(OPT_token_data_array* candidates);
-
86
-
87void sample_top_k(OPT_token_data_array* candidates, int k, size_t min_keep);
-
88
-
89int sample_token_mirostat(const int n_vocab, OPT_token_data_array* candidates, float tau, float eta, int m, float* mu);
-
90
-
91int sample_token_mirostat_v2(OPT_token_data_array* candidates, float tau, float eta, float* mu);
-
92
-
93void sample_tail_free(OPT_token_data_array* candidates, float z, size_t min_keep);
-
94
-
95void sample_typical(OPT_token_data_array* candidates, float p, size_t min_keep);
-
96
-
97void sample_top_p(OPT_token_data_array* candidates, float p, size_t min_keep);
-
98
-
99std::vector<int> OPTGenerate(void* model, int model_type, std::vector<int> input_ids,
-
100 const struct opt_params generation_config, Encoder* encoder = NULL,
-
101 bool interactive = false, bool voicechat = false);
-
102
-
103enum { OPT_INT8, LLaMA_FP32, LLaMA_INT4, OPT_FP32, OPT_INT4, StarCoder_FP32, StarCoder_INT4 };
-
104std::string LLaMAGenerate(std::string param_path, void* model, int model_type, std::string text, const struct opt_params generation_config,
-
105 std::string voc_path, bool interactive, bool voicechat);
-
106
-
107std::string GPTBigCodeGenerate(std::string param_path, void *model_ptr, int model_type, std::string text, const struct opt_params generation_config,
-
108 std::string voc_path, bool interactive);
-
109
-
110#endif // GENERATE_H
+
73
+
74void sample_repetition_penalty(OPT_token_data_array* candidates, const int* last_tokens, size_t last_tokens_size,
+
75 float penalty);
+
76
+
77void sample_frequency_and_presence_penalties(OPT_token_data_array* candidates, const int* last_tokens_p,
+
78 size_t last_tokens_size, float alpha_frequency, float alpha_presence);
+
79
+
80int sample_token_greedy(OPT_token_data_array* candidates);
+
81
+
82void sample_temperature(OPT_token_data_array* candidates_p, float temp);
+
83
+
84void sample_softmax(OPT_token_data_array* candidates);
+
85
+
86int sample_token(OPT_token_data_array* candidates);
+
87
+
88void sample_top_k(OPT_token_data_array* candidates, int k, size_t min_keep);
+
89
+
90int sample_token_mirostat(const int n_vocab, OPT_token_data_array* candidates, float tau, float eta, int m, float* mu);
+
91
+
92int sample_token_mirostat_v2(OPT_token_data_array* candidates, float tau, float eta, float* mu);
+
93
+
94void sample_tail_free(OPT_token_data_array* candidates, float z, size_t min_keep);
+
95
+
96void sample_typical(OPT_token_data_array* candidates, float p, size_t min_keep);
+
97
+
98void sample_top_p(OPT_token_data_array* candidates, float p, size_t min_keep);
+
99
+
100std::vector<int> OPTGenerate(void* model, int model_type, std::vector<int> input_ids,
+
101 const struct opt_params generation_config, Encoder* encoder = NULL,
+
102 bool interactive = false, bool voicechat = false);
+
103
+
104enum { OPT_INT8, LLaMA_FP32, LLaMA_INT4, OPT_FP32, OPT_INT4, StarCoder_FP32, StarCoder_INT4, LLaVA_FP32, LLaVA_INT4 };
+
105std::string LLaMAGenerate(std::string param_path, void* model, int model_type, std::string text, const struct opt_params generation_config,
+
106 std::string voc_path, bool interactive, bool voicechat);
+
107
+
108std::string GPTBigCodeGenerate(std::string param_path, void *model_ptr, int model_type, std::string text, const struct opt_params generation_config,
+
109 std::string voc_path, bool interactive);
+
110
+
111std::string LLaVAGenerate(std::string llama_param_path, void* llama_model_ptr, std::string clip_param_path, void* clip_model_ptr, int model_type,
+
112 std::string text, std::string img_path, const struct opt_params generation_config, std::string voc_path, bool interactive,
+
113 bool voicechat);
+
114
+
115#endif // GENERATE_H
Definition OPTTokenizer.h:35
-
Definition Generate.h:41
-
Definition Generate.h:35
-
Definition Generate.h:47
+
Definition Generate.h:42
+
Definition Generate.h:36
+
Definition Generate.h:48
diff --git a/classInt4llamaDecoder.html b/classInt4llamaDecoder.html index efd4e70b..5ee7f51b 100644 --- a/classInt4llamaDecoder.html +++ b/classInt4llamaDecoder.html @@ -135,6 +135,18 @@ floathidden_states_buf   + +floatinputs_embeds_buf +  + +floatfirst_input_ids_buf +  + +floatimage_embed_buf +  + +floatsecond_input_ids_buf floatnorm_weight_ptr = nullptr   diff --git a/classMatrix4D-members.html b/classMatrix4D-members.html new file mode 100644 index 00000000..4aa6131b --- /dev/null +++ b/classMatrix4D-members.html @@ -0,0 +1,101 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Matrix4D< T > Member List
+
+
+ +

This is the complete list of members for Matrix4D< T >, including all inherited members.

+ + + + + + + + + + + + + + + + +
length() const (defined in Matrix4D< T >)Matrix4D< T >inline
load(const char *path) (defined in Matrix4D< T >)Matrix4D< T >inline
m_data (defined in Matrix4D< T >)Matrix4D< T >
m_dim_w (defined in Matrix4D< T >)Matrix4D< T >
m_dim_x (defined in Matrix4D< T >)Matrix4D< T >
m_dim_y (defined in Matrix4D< T >)Matrix4D< T >
m_dim_z (defined in Matrix4D< T >)Matrix4D< T >
Matrix4D(T *data, int dim_w, int dim_x, int dim_y, int dim_z) (defined in Matrix4D< T >)Matrix4D< T >inline
Matrix4D() (defined in Matrix4D< T >)Matrix4D< T >inline
operator()(int w, int x, int y, int z) (defined in Matrix4D< T >)Matrix4D< T >inline
operator()(int w, int x, int y, int z) const (defined in Matrix4D< T >)Matrix4D< T >inline
operator==(const Matrix4D< T > &other) const (defined in Matrix4D< T >)Matrix4D< T >inline
sum() const (defined in Matrix4D< T >)Matrix4D< T >inline
sum(int size) const (defined in Matrix4D< T >)Matrix4D< T >inline
sum(int size, int start_idx) const (defined in Matrix4D< T >)Matrix4D< T >inline
+ + + + diff --git a/classMatrix4D.html b/classMatrix4D.html new file mode 100644 index 00000000..151d5a63 --- /dev/null +++ b/classMatrix4D.html @@ -0,0 +1,139 @@ + + + + + + + +TinyChatEngine: Matrix4D< T > Class Template Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Matrix4D< T > Class Template Reference
+
+
+ + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

Matrix4D (T *data, int dim_w, int dim_x, int dim_y, int dim_z)
 
+Toperator() (int w, int x, int y, int z)
 
+const Toperator() (int w, int x, int y, int z) const
 
+bool operator== (const Matrix4D< T > &other) const
 
+int length () const
 
+T sum () const
 
+T sum (int size) const
 
+T sum (int size, int start_idx) const
 
+void load (const char *path)
 
+ + + + + + + + + + + +

+Public Attributes

+Tm_data
 
+int m_dim_w
 
+int m_dim_x
 
+int m_dim_y
 
+int m_dim_z
 
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/classes.html b/classes.html index b0c98b9a..b46f34ca 100644 --- a/classes.html +++ b/classes.html @@ -84,22 +84,22 @@
BMM_F32T
BMM_S8T_S8N_F32T
BMM_S8T_S8N_F32T_params
BMM_S8T_S8N_S8T
BMM_S8T_S8N_S8T_params
C
-
llama_sp_bigram::comparator
+
clip_image_f32
clip_image_u8
clip_model_config
llama_sp_bigram::comparator
Conv2D
Conv2D_params
E
Embedding
Encoder
F
-
Fp32GPTBigCodeAttention
Fp32GPTBigCodeAttention_input
Fp32GPTBigCodeAttention_output
Fp32GPTBigCodeDecoder
Fp32GPTBigCodeDecoder_input
Fp32GPTBigCodeDecoder_output
Fp32GPTBigCodeDecoderLayer
Fp32GPTBigCodeDecoderLayer_input
Fp32GPTBigCodeDecoderLayer_output
Fp32GPTBigCodeForCausalLM
Fp32GPTBigCodeForCausalLM_input
Fp32GPTBigCodeForCausalLM_output
Fp32llamaAttention
Fp32llamaAttention_input
Fp32llamaAttention_output
Fp32llamaDecoder
Fp32llamaDecoder_input
Fp32llamaDecoder_output
Fp32llamaDecoderLayer
Fp32llamaDecoderLayer_input
Fp32llamaDecoderLayer_output
Fp32LlamaForCausalLM
Fp32LlamaForCausalLM_input
Fp32LlamaForCausalLM_output
Fp32OPTAttention
Fp32OPTAttention_input
Fp32OPTAttention_output
Fp32OPTDecoder
Fp32OPTDecoder_input
Fp32OPTDecoder_output
Fp32OPTDecoderLayer
Fp32OPTDecoderLayer_input
Fp32OPTDecoderLayer_output
Fp32OPTForCausalLM
Fp32OPTForCausalLM_input
Fp32OPTForCausalLM_output
+
Fp32CLIPAttention
Fp32CLIPAttention_input
Fp32CLIPAttention_output
Fp32CLIPEncoder
Fp32CLIPEncoder_input
Fp32CLIPEncoder_output
Fp32CLIPEncoderLayer
Fp32CLIPEncoderLayer_input
Fp32CLIPEncoderLayer_output
Fp32CLIPVisionTransformer
Fp32CLIPVisionTransformer_input
Fp32CLIPVisionTransformer_output
Fp32GPTBigCodeAttention
Fp32GPTBigCodeAttention_input
Fp32GPTBigCodeAttention_output
Fp32GPTBigCodeDecoder
Fp32GPTBigCodeDecoder_input
Fp32GPTBigCodeDecoder_output
Fp32GPTBigCodeDecoderLayer
Fp32GPTBigCodeDecoderLayer_input
Fp32GPTBigCodeDecoderLayer_output
Fp32GPTBigCodeForCausalLM
Fp32GPTBigCodeForCausalLM_input
Fp32GPTBigCodeForCausalLM_output
Fp32llamaAttention
Fp32llamaAttention_input
Fp32llamaAttention_output
Fp32llamaDecoder
Fp32llamaDecoder_input
Fp32llamaDecoder_output
Fp32llamaDecoderLayer
Fp32llamaDecoderLayer_input
Fp32llamaDecoderLayer_output
Fp32LlamaForCausalLM
Fp32LlamaForCausalLM_input
Fp32LlamaForCausalLM_output
Fp32OPTAttention
Fp32OPTAttention_input
Fp32OPTAttention_output
Fp32OPTDecoder
Fp32OPTDecoder_input
Fp32OPTDecoder_output
Fp32OPTDecoderLayer
Fp32OPTDecoderLayer_input
Fp32OPTDecoderLayer_output
Fp32OPTForCausalLM
Fp32OPTForCausalLM_input
Fp32OPTForCausalLM_output
I
int4_thread_args
Int4GPTBigCodeAttention
Int4GPTBigCodeAttention_input
Int4GPTBigCodeAttention_output
Int4GPTBigCodeDecoder
Int4GPTBigCodeDecoder_input
Int4GPTBigCodeDecoder_output
Int4GPTBigCodeDecoderLayer
Int4GPTBigCodeDecoderLayer_input
Int4GPTBigCodeDecoderLayer_output
Int4GPTBigCodeForCausalLM
Int4GPTBigCodeForCausalLM_input
Int4GPTBigCodeForCausalLM_output
Int4llamaAttention
Int4llamaAttention_input
Int4llamaAttention_output
Int4llamaDecoder
Int4llamaDecoder_input
Int4llamaDecoder_output
Int4llamaDecoderLayer
Int4llamaDecoderLayer_input
Int4llamaDecoderLayer_output
Int4LlamaForCausalLM
Int4LlamaForCausalLM_input
Int4LlamaForCausalLM_output
Int4OPTAttention
Int4OPTAttention_input
Int4OPTAttention_output
Int4OPTDecoder
Int4OPTDecoder_input
Int4OPTDecoder_output
Int4OPTDecoderLayer
Int4OPTDecoderLayer_input
Int4OPTDecoderLayer_output
Int4OPTForCausalLM
Int4OPTForCausalLM_input
Int4OPTForCausalLM_output
Int8OPTAttention
Int8OPTAttention_input
Int8OPTAttention_output
Int8OPTDecoder
Int8OPTDecoder_input
Int8OPTDecoder_output
Int8OPTDecoderLayer
Int8OPTDecoderLayer_input
Int8OPTDecoderLayer_output
L
-
LayerNorm
LayerNorm_params
LayerNormQ
LayerNormQ_params
Linear_FP
Linear_FP_int4
llama_file
llama_sp_bigram
llama_sp_symbol
llama_tokenizer
llama_vocab
LlamaRMSNorm
+
LayerNorm
LayerNorm_params
LayerNormQ
LayerNormQ_params
Linear_FP
Linear_FP_int4
llama_file
llama_sp_bigram
llama_sp_symbol
llama_tokenizer
llama_vocab
LlamaRMSNorm
llava_image_embed
M
-
matmul_params
MatmulOperator (matmul)
matrix
Matrix3D
max_error_info
MetalMatmulBuffers
MetalMatmulInt4
MetalMatmulInt4IMP
MetalMatMulParams
model_config
+
matmul_params
MatmulOperator (matmul)
matrix
Matrix3D
Matrix4D
max_error_info
MetalMatmulBuffers
MetalMatmulInt4
MetalMatmulInt4IMP
MetalMatMulParams
model_config
O
opt_params
OPT_token_data
OPT_token_data_array
OPTForCausalLM
OPTForCausalLM_input
OPTForCausalLM_output
optimization_params
@@ -114,7 +114,7 @@
RotaryPosEmb
S
-
starcoder_vocab
+
starcoder_vocab
stbi_io_callbacks
T
thread_args
llama_vocab::token_score
transpose_1_2idx_arg
transpose_1_2idx_float_arg
diff --git a/common_8h_source.html b/common_8h_source.html index b6b65383..d7f40c6d 100644 --- a/common_8h_source.html +++ b/common_8h_source.html @@ -216,13 +216,116 @@
126};
127
-
128static inline void debug_info(std::string s) {
-
129#ifdef DEBUG
-
130 std::cout << s << std::endl;
-
131#endif
-
132}
-
133#endif
+
128template <typename T>
+
+
129class Matrix4D {
+
130 public:
+
131 Matrix4D(T *data, int dim_w, int dim_x, int dim_y, int dim_z) :
+
132 m_data(data), m_dim_w(dim_w), m_dim_x(dim_x), m_dim_y(dim_y), m_dim_z(dim_z) {}
+
133
+
134#if defined(__CUDACC__)
+
135 __host__ __device__ T &operator()(int w, int x, int y, int z) {
+
136 return m_data[w * m_dim_x * m_dim_y * m_dim_z + x * m_dim_y * m_dim_z + y * m_dim_z + z];
+
137 }
+
138
+
139 __host__ __device__ const T &operator()(int w, int x, int y, int z) const {
+
140 return m_data[w * m_dim_x * m_dim_y * m_dim_z + x * m_dim_y * m_dim_z + y * m_dim_z + z];
+
141 }
+
142#else
+
143 T &operator()(int w, int x, int y, int z) {
+
144 if (w < 0 || w >= m_dim_w || x < 0 || x >= m_dim_x || y < 0 || y >= m_dim_y || z < 0 || z >= m_dim_z) {
+
145 printf("%d, %d, %d, %d\n", w, x, y, z);
+
146 printf("%d, %d, %d, %d\n", m_dim_w, m_dim_x, m_dim_y, m_dim_z);
+
147 throw std::out_of_range("Matrix4D: Indices out of range.");
+
148 }
+
149 return m_data[w * m_dim_x * m_dim_y * m_dim_z + x * m_dim_y * m_dim_z + y * m_dim_z + z];
+
150 }
+
151
+
152 const T &operator()(int w, int x, int y, int z) const {
+
153 if (w < 0 || w >= m_dim_w || x < 0 || x >= m_dim_x || y < 0 || y >= m_dim_y || z < 0 || z >= m_dim_z) {
+
154 printf("%d, %d, %d, %d\n", w, x, y, z);
+
155 printf("%d, %d, %d, %d\n", m_dim_w, m_dim_x, m_dim_y, m_dim_z);
+
156 throw std::out_of_range("Matrix4D: Indices out of range.");
+
157 }
+
158 return m_data[w * m_dim_x * m_dim_y * m_dim_z + x * m_dim_y * m_dim_z + y * m_dim_z + z];
+
159 }
+
160#endif
+
161
+
162 bool operator==(const Matrix4D<T> &other) const {
+
163 if (m_dim_w != other.m_dim_w || m_dim_x != other.m_dim_x || m_dim_y != other.m_dim_y || m_dim_z != other.m_dim_z) {
+
164 return false;
+
165 }
+
166
+
167 for (int w = 0; w < m_dim_w; ++w) {
+
168 for (int x = 0; x < m_dim_x; ++x) {
+
169 for (int y = 0; y < m_dim_y; ++y) {
+
170 for (int z = 0; z < m_dim_z; ++z) {
+
171 if ((*this)(w, x, y, z) != other(w, x, y, z)) {
+
172 return false;
+
173 }
+
174 }
+
175 }
+
176 }
+
177 }
+
178
+
179 return true;
+
180 }
+
181
+
182#if defined(__CUDACC__)
+
183 __host__ __device__ int length() const { return m_dim_w * m_dim_x * m_dim_y * m_dim_z; }
+
184#else
+
185 int length() const { return m_dim_w * m_dim_x * m_dim_y * m_dim_z; }
+
186#endif
+
187
+
188 T sum() const {
+
189 T sum = 0;
+
190 for (int i = 0; i < this->length(); i++) {
+
191 sum += this->m_data[i];
+
192 }
+
193 return sum;
+
194 }
+
195 T sum(int size) const {
+
196 T sum = 0;
+
197 for (int i = 0; i < size; i++) {
+
198 sum += this->m_data[i];
+
199 }
+
200 return sum;
+
201 }
+
202
+
203 T sum(int size, int start_idx) const {
+
204 T sum = 0;
+
205 for (int i = 0; i < size; i++) {
+
206 sum += this->m_data[start_idx + i];
+
207 }
+
208 return sum;
+
209 }
+
210
+
211 void load(const char *path) {
+
212 std::ifstream infile(path, std::ios::binary | std::ios::in);
+
213 if (infile.fail()) {
+
214 std::cout << strerror(errno) << ": " << path << std::endl;
+
215 throw("Expected error...");
+
216 } else {
+
217 infile.read(reinterpret_cast<char *>(this->m_data), this->length() * sizeof(T));
+
218 infile.close();
+
219 }
+
220 }
+
221 T *m_data;
+
222 int m_dim_w, m_dim_x, m_dim_y, m_dim_z;
+
223
+
224 // Default constructor
+
225 Matrix4D() { m_data = NULL; }
+
226};
+
+
227
+
228static inline void debug_info(std::string s) {
+
229#ifdef DEBUG
+
230 std::cout << s << std::endl;
+
231#endif
+
232}
+
233#endif
Definition common.h:34
+
Definition common.h:129
Definition common.h:23
Definition common.h:28
diff --git a/dir_1a52ab83798783f2e1c21e44145c68ba.html b/dir_1a52ab83798783f2e1c21e44145c68ba.html index c19e9337..a4274659 100644 --- a/dir_1a52ab83798783f2e1c21e44145c68ba.html +++ b/dir_1a52ab83798783f2e1c21e44145c68ba.html @@ -89,8 +89,12 @@    BMM_S8T_S8N_S8T.h   + Conv2D.h Embedding.h   + Gelu.h LayerNorm.h    LayerNormQ.h diff --git a/dir_29e46f7556369f4f86197d2d4903a8fc.html b/dir_29e46f7556369f4f86197d2d4903a8fc.html index b02a5b3b..1a3e7205 100644 --- a/dir_29e46f7556369f4f86197d2d4903a8fc.html +++ b/dir_29e46f7556369f4f86197d2d4903a8fc.html @@ -81,6 +81,14 @@ + + + + + + + + diff --git a/dir_6a23dd2f4b74efa88df1553988330e41.html b/dir_6a23dd2f4b74efa88df1553988330e41.html index bae7dafa..2627ff29 100644 --- a/dir_6a23dd2f4b74efa88df1553988330e41.html +++ b/dir_6a23dd2f4b74efa88df1553988330e41.html @@ -104,6 +104,8 @@ + +

Files

 Fp32CLIPAttention.h
 
 Fp32CLIPEncoder.h
 
 Fp32CLIPEncoderLayer.h
 
 Fp32CLIPVisionTransformer.h
 
 Fp32GPTBigCodeAttention.h
 
 Fp32GPTBigCodeDecoder.h
 
 profiler.h
 
 stb_image.h
 
 utils.h
 
diff --git a/doxygen_crawl.html b/doxygen_crawl.html index 60fadd6d..04e731bb 100644 --- a/doxygen_crawl.html +++ b/doxygen_crawl.html @@ -18,6 +18,10 @@ + + + + @@ -51,7 +55,9 @@ + + @@ -62,6 +68,7 @@ + @@ -76,10 +83,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -272,12 +313,16 @@ + + + + @@ -322,6 +367,8 @@ + + diff --git a/files.html b/files.html index f99d50ed..92c0ebfc 100644 --- a/files.html +++ b/files.html @@ -87,48 +87,54 @@   llm   include   nn_modules - Fp32GPTBigCodeAttention.h - Fp32GPTBigCodeDecoder.h - Fp32GPTBigCodeDecoderLayer.h - Fp32GPTBigCodeForCausalLM.h - Fp32llamaAttention.h - Fp32llamaDecoder.h - Fp32llamaDecoderLayer.h - Fp32llamaForCausalLM.h - Fp32OPTAttention.h - Fp32OPTDecoder.h - Fp32OPTDecoderLayer.h - Fp32OPTForCausalLM.h - Int4GPTBigCodeAttention.h - Int4GPTBigCodeDecoder.h - Int4GPTBigCodeDecoderLayer.h - Int4GPTBigCodeForCausalLM.h - Int4llamaAttention.h - Int4llamaDecoder.h - Int4llamaDecoderLayer.h - Int4llamaForCausalLM.h - Int4OPTAttention.h - Int4OPTDecoder.h - Int4OPTDecoderLayer.h - Int4OPTForCausalLM.h - Int8OPTAttention.h - Int8OPTDecoder.h - Int8OPTDecoderLayer.h - OPTForCausalLM.h + Fp32CLIPAttention.h + Fp32CLIPEncoder.h + Fp32CLIPEncoderLayer.h + Fp32CLIPVisionTransformer.h + Fp32GPTBigCodeAttention.h + Fp32GPTBigCodeDecoder.h + Fp32GPTBigCodeDecoderLayer.h + Fp32GPTBigCodeForCausalLM.h + Fp32llamaAttention.h + Fp32llamaDecoder.h + Fp32llamaDecoderLayer.h + Fp32llamaForCausalLM.h + Fp32OPTAttention.h + Fp32OPTDecoder.h + Fp32OPTDecoderLayer.h + Fp32OPTForCausalLM.h + Int4GPTBigCodeAttention.h + Int4GPTBigCodeDecoder.h + Int4GPTBigCodeDecoderLayer.h + Int4GPTBigCodeForCausalLM.h + Int4llamaAttention.h + Int4llamaDecoder.h + Int4llamaDecoderLayer.h + Int4llamaForCausalLM.h + Int4OPTAttention.h + Int4OPTDecoder.h + Int4OPTDecoderLayer.h + Int4OPTForCausalLM.h + Int8OPTAttention.h + Int8OPTDecoder.h + Int8OPTDecoderLayer.h + OPTForCausalLM.h   ops  arg_max.h  BMM_F32T.h  BMM_S8T_S8N_F32T.h  BMM_S8T_S8N_S8T.h - Embedding.h - LayerNorm.h - LayerNormQ.h - linear.h - LlamaRMSNorm.h - RotaryPosEmb.h - W8A8B8O8Linear.h - W8A8B8O8LinearReLU.h - W8A8BFP32OFP32Linear.h + Conv2D.h + Embedding.h + Gelu.h + LayerNorm.h + LayerNormQ.h + linear.h + LlamaRMSNorm.h + RotaryPosEmb.h + W8A8B8O8Linear.h + W8A8B8O8LinearReLU.h + W8A8BFP32OFP32Linear.h  common.h  Generate.h  GPTBigCodeTokenizer.h @@ -137,7 +143,8 @@  operators.h  OPTTokenizer.h  profiler.h - utils.h + stb_image.h + utils.h diff --git a/matmul_8h_source.html b/matmul_8h_source.html index 44644372..9478e6dc 100644 --- a/matmul_8h_source.html +++ b/matmul_8h_source.html @@ -243,7 +243,7 @@
Definition matmul.h:106
Definition matmul.h:78
Definition matmul.h:59
-
Definition Generate.h:47
+
Definition Generate.h:48
Definition matmul.h:73
Definition matmul.h:52
Definition matmul.h:94
diff --git a/model_8h_source.html b/model_8h_source.html index 954652fc..5b9327b8 100644 --- a/model_8h_source.html +++ b/model_8h_source.html @@ -98,69 +98,98 @@
12 int vocsize;
13 int padding_idx;
14 float rms_norm_eps; // RMSNorm epsilon (only for LLaMA models)
-
15
-
16 model_config() : model_config(1, 32, 32, 2048, 4096, 11008, 32000, 1, 1e-6) {}
-
17 model_config(int batch, int num_heads, int num_layers, int max_sqlen, int embed_dim, int hidden_dim, int vocsize,
-
18 int padding_idx, float rms_norm_eps)
-
19 : batch(batch),
-
20 num_heads(num_heads),
-
21 num_layers(num_layers),
-
22 max_sqlen(max_sqlen),
-
23 embed_dim(embed_dim),
-
24 hidden_dim(hidden_dim),
-
25 vocsize(vocsize),
-
26 padding_idx(padding_idx),
-
27 rms_norm_eps(rms_norm_eps) {}
-
28};
+
15 // Below are for Clip models
+
16 int image_size;
+
17 int patch_size;
+
18 int projection_dim;
+
19 int mmproj_dim;
+
20
+
21 model_config() : model_config(1, 32, 32, 2048, 4096, 11008, 32000, 1, 1e-6, 0, 0, 0, 0) {}
+
22 model_config(int batch, int num_heads, int num_layers, int max_sqlen, int embed_dim, int hidden_dim, int vocsize,
+
23 int padding_idx, float rms_norm_eps)
+
24 : batch(batch),
+
25 num_heads(num_heads),
+
26 num_layers(num_layers),
+
27 max_sqlen(max_sqlen),
+
28 embed_dim(embed_dim),
+
29 hidden_dim(hidden_dim),
+
30 vocsize(vocsize),
+
31 padding_idx(padding_idx),
+
32 rms_norm_eps(rms_norm_eps) {}
+
33 // Clip models
+
34 model_config(int batch, int num_heads, int num_layers, int max_sqlen, int embed_dim, int hidden_dim, int vocsize,
+
35 int padding_idx, float rms_norm_eps, int image_size, int patch_size, int projection_dim, int mmproj_dim)
+
36 : batch(batch),
+
37 num_heads(num_heads),
+
38 num_layers(num_layers),
+
39 max_sqlen(max_sqlen),
+
40 embed_dim(embed_dim),
+
41 hidden_dim(hidden_dim),
+
42 vocsize(vocsize),
+
43 padding_idx(padding_idx),
+
44 rms_norm_eps(rms_norm_eps),
+
45 image_size(image_size),
+
46 patch_size(patch_size),
+
47 projection_dim(projection_dim),
+
48 mmproj_dim(mmproj_dim) {}
+
49};
-
29
-
30enum { OPT_125M, OPT_1_3B, OPT_6_7B, LLaMA_7B, LLaMA_13B, CodeLLaMA_7B, CodeLLaMA_13B, StarCoder_15_5B };
-
31enum { FP32, QINT8, INT4 };
-
32
-
33const struct model_config opt_6_7B(1, 32, 32, 2048, 4096, 16384, 50272, 1, 0);
-
34const struct model_config opt_1_3B(1, 32, 24, 2048, 2048, 8192, 50272, 1, 0);
-
35const struct model_config opt_125m(1, 12, 12, 2048, 768, 3072, 50272, 1, 0);
-
36const struct model_config llama_7B(1, 32, 32, 2048, 4096, 11008, 32000, 1, 1e-6);
-
37const struct model_config llama_13B(1, 40, 40, 2048, 5120, 13824, 32000, 1, 1e-6);
-
38const struct model_config codellama_7B(1, 32, 32, 2048, 4096, 11008, 32016, 1, 1e-5);
-
39const struct model_config codellama_13B(1, 40, 40, 2048, 5120, 13824, 32016, 1, 1e-5);
-
40// const struct model_config starcoder_15_5B(1, 32, 32, 2048, 4096, 11008, 32000, 1, 0); // temporary
-
41const struct model_config starcoder_15_5B(1, 48, 40, 2048, 6144, 24576, 49152, 1, 0);
-
42static struct model_config get_opt_model_config(int choise) {
-
43 struct model_config ret;
-
44 switch (choise) {
-
45 case OPT_125M:
-
46 ret = opt_125m;
-
47 break;
-
48 case OPT_1_3B:
-
49 ret = opt_1_3B;
-
50 break;
-
51 case OPT_6_7B:
-
52 ret = opt_6_7B;
-
53 break;
-
54 case LLaMA_7B:
-
55 ret = llama_7B;
-
56 break;
-
57 case LLaMA_13B:
-
58 ret = llama_13B;
-
59 break;
-
60 case CodeLLaMA_7B:
-
61 ret = codellama_7B;
-
62 break;
-
63 case CodeLLaMA_13B:
-
64 ret = codellama_13B;
-
65 break;
-
66 case StarCoder_15_5B:
-
67 ret = starcoder_15_5B;
-
68 break;
-
69 default:
-
70 throw("Unsupported model choice.");
-
71 break;
-
72 }
-
73 return ret;
-
74}
-
75
-
76#endif
+
50
+
51enum { OPT_125M, OPT_1_3B, OPT_6_7B, LLaMA_7B, LLaMA_13B, CodeLLaMA_7B, CodeLLaMA_13B, StarCoder_15_5B, LLaVA_7B, Clip_ViT_Large };
+
52enum { FP32, QINT8, INT4 };
+
53
+
54const struct model_config opt_6_7B(1, 32, 32, 2048, 4096, 16384, 50272, 1, 0);
+
55const struct model_config opt_1_3B(1, 32, 24, 2048, 2048, 8192, 50272, 1, 0);
+
56const struct model_config opt_125m(1, 12, 12, 2048, 768, 3072, 50272, 1, 0);
+
57const struct model_config llama_7B(1, 32, 32, 2048, 4096, 11008, 32000, 1, 1e-6);
+
58const struct model_config llama_13B(1, 40, 40, 2048, 5120, 13824, 32000, 1, 1e-6);
+
59const struct model_config codellama_7B(1, 32, 32, 2048, 4096, 11008, 32016, 1, 1e-5);
+
60const struct model_config codellama_13B(1, 40, 40, 2048, 5120, 13824, 32016, 1, 1e-5);
+
61const struct model_config starcoder_15_5B(1, 48, 40, 2048, 6144, 24576, 49152, 1, 0);
+
62const struct model_config llava_7B(1, 32, 32, 2048, 4096, 11008, 32000, 1, 1e-5);
+
63const struct model_config clip_vit_large(1, 16, 23, 2048, 1024, 4096, 0, 1, 0, 336, 14, 768, 4096); // llava's clip model uses only 23 layers out of 24
+
64
+
65static struct model_config get_opt_model_config(int choise) {
+
66 struct model_config ret;
+
67 switch (choise) {
+
68 case OPT_125M:
+
69 ret = opt_125m;
+
70 break;
+
71 case OPT_1_3B:
+
72 ret = opt_1_3B;
+
73 break;
+
74 case OPT_6_7B:
+
75 ret = opt_6_7B;
+
76 break;
+
77 case LLaMA_7B:
+
78 ret = llama_7B;
+
79 break;
+
80 case LLaMA_13B:
+
81 ret = llama_13B;
+
82 break;
+
83 case CodeLLaMA_7B:
+
84 ret = codellama_7B;
+
85 break;
+
86 case CodeLLaMA_13B:
+
87 ret = codellama_13B;
+
88 break;
+
89 case StarCoder_15_5B:
+
90 ret = starcoder_15_5B;
+
91 break;
+
92 case LLaVA_7B:
+
93 ret = llava_7B;
+
94 break;
+
95 case Clip_ViT_Large:
+
96 ret = clip_vit_large;
+
97 break;
+
98 default:
+
99 throw("Unsupported model choice.");
+
100 break;
+
101 }
+
102 return ret;
+
103}
+
104
+
105#endif
Definition model.h:5
diff --git a/operators_8h_source.html b/operators_8h_source.html index 87056f01..def75afd 100644 --- a/operators_8h_source.html +++ b/operators_8h_source.html @@ -108,27 +108,30 @@
23#include "ops/W8A8BFP32OFP32Linear.h"
24#include "ops/arg_max.h"
25#include "ops/linear.h"
-
26
-
27void softmax(const Matrix3D<float> &input, Matrix3D<float> &output, int dim);
-
28void batch_Add(const Matrix3D<float> &input, const Matrix3D<float> &input2, Matrix3D<float> &output);
-
29template <typename T>
-
30void linear(Matrix3D<T> &a, Matrix3D<T> &b, Matrix3D<T> &c);
-
31
-
32#ifdef QM_CUDA
-
33#include "ops/cuda/BMM_F16T.cuh"
-
34#include "ops/cuda/Embedding.cuh"
-
35#include "ops/cuda/LlamaRMSNorm.cuh"
-
36#include "ops/cuda/RotaryPosEmb.cuh"
-
37
-
38__global__ void batch_Add_float(const Matrix3D<float> input, const Matrix3D<float> input2, Matrix3D<float> output);
-
39__global__ void batch_Add_cuda(const Matrix3D<float16_t> input, const Matrix3D<float16_t> input2,
-
40 Matrix3D<float16_t> output);
-
41__global__ void batch_Add_cuda_half2(Matrix3D<float16_t> input, Matrix3D<float16_t> input2, Matrix3D<float16_t> output);
-
42__global__ void softmax_float(Matrix3D<float> input, Matrix3D<float> output);
-
43__global__ void softmax_cuda(Matrix3D<float16_t> input, Matrix3D<float16_t> output);
-
44#endif
-
45
-
46#endif // OPERATORS_H
+
26#include "ops/Conv2D.h"
+
27#include "ops/Gelu.h"
+
28
+
29void softmax(const Matrix3D<float> &input, Matrix3D<float> &output, int dim);
+
30void batch_Add(const Matrix3D<float> &input, const Matrix3D<float> &input2, Matrix3D<float> &output);
+
31template <typename T>
+
32void linear(Matrix3D<T> &a, Matrix3D<T> &b, Matrix3D<T> &c);
+
33
+
34
+
35#ifdef QM_CUDA
+
36#include "ops/cuda/BMM_F16T.cuh"
+
37#include "ops/cuda/Embedding.cuh"
+
38#include "ops/cuda/LlamaRMSNorm.cuh"
+
39#include "ops/cuda/RotaryPosEmb.cuh"
+
40
+
41__global__ void batch_Add_float(const Matrix3D<float> input, const Matrix3D<float> input2, Matrix3D<float> output);
+
42__global__ void batch_Add_cuda(const Matrix3D<float16_t> input, const Matrix3D<float16_t> input2,
+
43 Matrix3D<float16_t> output);
+
44__global__ void batch_Add_cuda_half2(Matrix3D<float16_t> input, Matrix3D<float16_t> input2, Matrix3D<float16_t> output);
+
45__global__ void softmax_float(Matrix3D<float> input, Matrix3D<float> output);
+
46__global__ void softmax_cuda(Matrix3D<float16_t> input, Matrix3D<float16_t> output);
+
47#endif
+
48
+
49#endif // OPERATORS_H
Definition common.h:34
diff --git a/search/all_14.js b/search/all_14.js index e5ab522d..a7cce7dd 100644 --- a/search/all_14.js +++ b/search/all_14.js @@ -5,7 +5,8 @@ var searchData= ['specific_20int4_20weight_20reordering_2',['Device-specific int4 Weight Reordering',['../index.html#autotoc_md15',1,'']]], ['speech_20to_20speech_20chatbot_20with_20tinychatengine_20a_20href_20https_3a_20youtu_20be_20bw5dm3awmna_20si_20ccvzdmq3hwoweqcc_20demo_20a_3',['Deploy speech-to-speech chatbot with TinyChatEngine <a href="https://youtu.be/Bw5Dm3aWMnA?si=CCvZDmq3HwowEQcC" >[Demo]</a>',['../index.html#autotoc_md12',1,'']]], ['starcoder_5fvocab_4',['starcoder_vocab',['../structstarcoder__vocab.html',1,'']]], - ['step_20by_20step_20to_20deploy_20llama2_207b_20chat_20with_20tinychatengine_5',['Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine',['../index.html#autotoc_md11',1,'']]], - ['step_20to_20deploy_20llama2_207b_20chat_20with_20tinychatengine_6',['Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine',['../index.html#autotoc_md11',1,'']]], - ['support_7',['Support',['../index.html#autotoc_md13',1,'Backend Support'],['../index.html#autotoc_md14',1,'Quantization and Model Support']]] + ['stbi_5fio_5fcallbacks_5',['stbi_io_callbacks',['../structstbi__io__callbacks.html',1,'']]], + ['step_20by_20step_20to_20deploy_20llama2_207b_20chat_20with_20tinychatengine_6',['Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine',['../index.html#autotoc_md11',1,'']]], + ['step_20to_20deploy_20llama2_207b_20chat_20with_20tinychatengine_7',['Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine',['../index.html#autotoc_md11',1,'']]], + ['support_8',['Support',['../index.html#autotoc_md13',1,'Backend Support'],['../index.html#autotoc_md14',1,'Quantization and Model Support']]] ]; diff --git a/search/all_6.js b/search/all_6.js index 842144a7..fdaaef0f 100644 --- a/search/all_6.js +++ b/search/all_6.js @@ -4,8 +4,13 @@ var searchData= ['chat_20demo_20on_20an_20apple_20macbook_20pro_20m1_202021_20_3a_1',['LLaMA Chat Demo on an Apple MacBook Pro (M1, 2021):',['../index.html#autotoc_md2',1,'']]], ['chat_20with_20tinychatengine_2',['Step-by-step to Deploy LLaMA2-7B-chat with TinyChatEngine',['../index.html#autotoc_md11',1,'']]], ['chatbot_20with_20tinychatengine_20a_20href_20https_3a_20youtu_20be_20bw5dm3awmna_20si_20ccvzdmq3hwoweqcc_20demo_20a_3',['Deploy speech-to-speech chatbot with TinyChatEngine <a href="https://youtu.be/Bw5Dm3aWMnA?si=CCvZDmq3HwowEQcC" >[Demo]</a>',['../index.html#autotoc_md12',1,'']]], - ['code_20llama_20demo_20on_20an_20nvidia_20geforce_20rtx_204070_20laptop_3a_4',['Code LLaMA Demo on an NVIDIA GeForce RTX 4070 laptop:',['../index.html#autotoc_md1',1,'']]], - ['comparator_5',['comparator',['../structllama__sp__bigram_1_1comparator.html',1,'llama_sp_bigram']]], - ['compression_3a_20smoothquant_20and_20awq_6',['LLM Compression: SmoothQuant and AWQ',['../index.html#autotoc_md4',1,'']]], - ['cpu_7',['Windows with CPU',['../index.html#autotoc_md9',1,'']]] + ['clip_5fimage_5ff32_4',['clip_image_f32',['../structclip__image__f32.html',1,'']]], + ['clip_5fimage_5fu8_5',['clip_image_u8',['../structclip__image__u8.html',1,'']]], + ['clip_5fmodel_5fconfig_6',['clip_model_config',['../structclip__model__config.html',1,'']]], + ['code_20llama_20demo_20on_20an_20nvidia_20geforce_20rtx_204070_20laptop_3a_7',['Code LLaMA Demo on an NVIDIA GeForce RTX 4070 laptop:',['../index.html#autotoc_md1',1,'']]], + ['comparator_8',['comparator',['../structllama__sp__bigram_1_1comparator.html',1,'llama_sp_bigram']]], + ['compression_3a_20smoothquant_20and_20awq_9',['LLM Compression: SmoothQuant and AWQ',['../index.html#autotoc_md4',1,'']]], + ['conv2d_10',['Conv2D',['../classConv2D.html',1,'']]], + ['conv2d_5fparams_11',['Conv2D_params',['../structConv2D__params.html',1,'']]], + ['cpu_12',['Windows with CPU',['../index.html#autotoc_md9',1,'']]] ]; diff --git a/search/all_9.js b/search/all_9.js index b1f8949c..bec8fde9 100644 --- a/search/all_9.js +++ b/search/all_9.js @@ -1,40 +1,52 @@ var searchData= [ - ['fp32gptbigcodeattention_0',['Fp32GPTBigCodeAttention',['../classFp32GPTBigCodeAttention.html',1,'']]], - ['fp32gptbigcodeattention_5finput_1',['Fp32GPTBigCodeAttention_input',['../structFp32GPTBigCodeAttention__input.html',1,'']]], - ['fp32gptbigcodeattention_5foutput_2',['Fp32GPTBigCodeAttention_output',['../structFp32GPTBigCodeAttention__output.html',1,'']]], - ['fp32gptbigcodedecoder_3',['Fp32GPTBigCodeDecoder',['../classFp32GPTBigCodeDecoder.html',1,'']]], - ['fp32gptbigcodedecoder_5finput_4',['Fp32GPTBigCodeDecoder_input',['../structFp32GPTBigCodeDecoder__input.html',1,'']]], - ['fp32gptbigcodedecoder_5foutput_5',['Fp32GPTBigCodeDecoder_output',['../structFp32GPTBigCodeDecoder__output.html',1,'']]], - ['fp32gptbigcodedecoderlayer_6',['Fp32GPTBigCodeDecoderLayer',['../classFp32GPTBigCodeDecoderLayer.html',1,'']]], - ['fp32gptbigcodedecoderlayer_5finput_7',['Fp32GPTBigCodeDecoderLayer_input',['../structFp32GPTBigCodeDecoderLayer__input.html',1,'']]], - ['fp32gptbigcodedecoderlayer_5foutput_8',['Fp32GPTBigCodeDecoderLayer_output',['../structFp32GPTBigCodeDecoderLayer__output.html',1,'']]], - ['fp32gptbigcodeforcausallm_9',['Fp32GPTBigCodeForCausalLM',['../classFp32GPTBigCodeForCausalLM.html',1,'']]], - ['fp32gptbigcodeforcausallm_5finput_10',['Fp32GPTBigCodeForCausalLM_input',['../structFp32GPTBigCodeForCausalLM__input.html',1,'']]], - ['fp32gptbigcodeforcausallm_5foutput_11',['Fp32GPTBigCodeForCausalLM_output',['../structFp32GPTBigCodeForCausalLM__output.html',1,'']]], - ['fp32llamaattention_12',['Fp32llamaAttention',['../classFp32llamaAttention.html',1,'']]], - ['fp32llamaattention_5finput_13',['Fp32llamaAttention_input',['../structFp32llamaAttention__input.html',1,'']]], - ['fp32llamaattention_5foutput_14',['Fp32llamaAttention_output',['../structFp32llamaAttention__output.html',1,'']]], - ['fp32llamadecoder_15',['Fp32llamaDecoder',['../classFp32llamaDecoder.html',1,'']]], - ['fp32llamadecoder_5finput_16',['Fp32llamaDecoder_input',['../structFp32llamaDecoder__input.html',1,'']]], - ['fp32llamadecoder_5foutput_17',['Fp32llamaDecoder_output',['../structFp32llamaDecoder__output.html',1,'']]], - ['fp32llamadecoderlayer_18',['Fp32llamaDecoderLayer',['../classFp32llamaDecoderLayer.html',1,'']]], - ['fp32llamadecoderlayer_5finput_19',['Fp32llamaDecoderLayer_input',['../structFp32llamaDecoderLayer__input.html',1,'']]], - ['fp32llamadecoderlayer_5foutput_20',['Fp32llamaDecoderLayer_output',['../structFp32llamaDecoderLayer__output.html',1,'']]], - ['fp32llamaforcausallm_21',['Fp32LlamaForCausalLM',['../classFp32LlamaForCausalLM.html',1,'']]], - ['fp32llamaforcausallm_5finput_22',['Fp32LlamaForCausalLM_input',['../structFp32LlamaForCausalLM__input.html',1,'']]], - ['fp32llamaforcausallm_5foutput_23',['Fp32LlamaForCausalLM_output',['../structFp32LlamaForCausalLM__output.html',1,'']]], - ['fp32optattention_24',['Fp32OPTAttention',['../classFp32OPTAttention.html',1,'']]], - ['fp32optattention_5finput_25',['Fp32OPTAttention_input',['../structFp32OPTAttention__input.html',1,'']]], - ['fp32optattention_5foutput_26',['Fp32OPTAttention_output',['../structFp32OPTAttention__output.html',1,'']]], - ['fp32optdecoder_27',['Fp32OPTDecoder',['../classFp32OPTDecoder.html',1,'']]], - ['fp32optdecoder_5finput_28',['Fp32OPTDecoder_input',['../structFp32OPTDecoder__input.html',1,'']]], - ['fp32optdecoder_5foutput_29',['Fp32OPTDecoder_output',['../structFp32OPTDecoder__output.html',1,'']]], - ['fp32optdecoderlayer_30',['Fp32OPTDecoderLayer',['../classFp32OPTDecoderLayer.html',1,'']]], - ['fp32optdecoderlayer_5finput_31',['Fp32OPTDecoderLayer_input',['../structFp32OPTDecoderLayer__input.html',1,'']]], - ['fp32optdecoderlayer_5foutput_32',['Fp32OPTDecoderLayer_output',['../structFp32OPTDecoderLayer__output.html',1,'']]], - ['fp32optforcausallm_33',['Fp32OPTForCausalLM',['../classFp32OPTForCausalLM.html',1,'']]], - ['fp32optforcausallm_5finput_34',['Fp32OPTForCausalLM_input',['../structFp32OPTForCausalLM__input.html',1,'']]], - ['fp32optforcausallm_5foutput_35',['Fp32OPTForCausalLM_output',['../structFp32OPTForCausalLM__output.html',1,'']]], - ['from_20our_20model_20zoo_36',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]] + ['fp32clipattention_0',['Fp32CLIPAttention',['../classFp32CLIPAttention.html',1,'']]], + ['fp32clipattention_5finput_1',['Fp32CLIPAttention_input',['../structFp32CLIPAttention__input.html',1,'']]], + ['fp32clipattention_5foutput_2',['Fp32CLIPAttention_output',['../structFp32CLIPAttention__output.html',1,'']]], + ['fp32clipencoder_3',['Fp32CLIPEncoder',['../classFp32CLIPEncoder.html',1,'']]], + ['fp32clipencoder_5finput_4',['Fp32CLIPEncoder_input',['../structFp32CLIPEncoder__input.html',1,'']]], + ['fp32clipencoder_5foutput_5',['Fp32CLIPEncoder_output',['../structFp32CLIPEncoder__output.html',1,'']]], + ['fp32clipencoderlayer_6',['Fp32CLIPEncoderLayer',['../classFp32CLIPEncoderLayer.html',1,'']]], + ['fp32clipencoderlayer_5finput_7',['Fp32CLIPEncoderLayer_input',['../structFp32CLIPEncoderLayer__input.html',1,'']]], + ['fp32clipencoderlayer_5foutput_8',['Fp32CLIPEncoderLayer_output',['../structFp32CLIPEncoderLayer__output.html',1,'']]], + ['fp32clipvisiontransformer_9',['Fp32CLIPVisionTransformer',['../classFp32CLIPVisionTransformer.html',1,'']]], + ['fp32clipvisiontransformer_5finput_10',['Fp32CLIPVisionTransformer_input',['../structFp32CLIPVisionTransformer__input.html',1,'']]], + ['fp32clipvisiontransformer_5foutput_11',['Fp32CLIPVisionTransformer_output',['../structFp32CLIPVisionTransformer__output.html',1,'']]], + ['fp32gptbigcodeattention_12',['Fp32GPTBigCodeAttention',['../classFp32GPTBigCodeAttention.html',1,'']]], + ['fp32gptbigcodeattention_5finput_13',['Fp32GPTBigCodeAttention_input',['../structFp32GPTBigCodeAttention__input.html',1,'']]], + ['fp32gptbigcodeattention_5foutput_14',['Fp32GPTBigCodeAttention_output',['../structFp32GPTBigCodeAttention__output.html',1,'']]], + ['fp32gptbigcodedecoder_15',['Fp32GPTBigCodeDecoder',['../classFp32GPTBigCodeDecoder.html',1,'']]], + ['fp32gptbigcodedecoder_5finput_16',['Fp32GPTBigCodeDecoder_input',['../structFp32GPTBigCodeDecoder__input.html',1,'']]], + ['fp32gptbigcodedecoder_5foutput_17',['Fp32GPTBigCodeDecoder_output',['../structFp32GPTBigCodeDecoder__output.html',1,'']]], + ['fp32gptbigcodedecoderlayer_18',['Fp32GPTBigCodeDecoderLayer',['../classFp32GPTBigCodeDecoderLayer.html',1,'']]], + ['fp32gptbigcodedecoderlayer_5finput_19',['Fp32GPTBigCodeDecoderLayer_input',['../structFp32GPTBigCodeDecoderLayer__input.html',1,'']]], + ['fp32gptbigcodedecoderlayer_5foutput_20',['Fp32GPTBigCodeDecoderLayer_output',['../structFp32GPTBigCodeDecoderLayer__output.html',1,'']]], + ['fp32gptbigcodeforcausallm_21',['Fp32GPTBigCodeForCausalLM',['../classFp32GPTBigCodeForCausalLM.html',1,'']]], + ['fp32gptbigcodeforcausallm_5finput_22',['Fp32GPTBigCodeForCausalLM_input',['../structFp32GPTBigCodeForCausalLM__input.html',1,'']]], + ['fp32gptbigcodeforcausallm_5foutput_23',['Fp32GPTBigCodeForCausalLM_output',['../structFp32GPTBigCodeForCausalLM__output.html',1,'']]], + ['fp32llamaattention_24',['Fp32llamaAttention',['../classFp32llamaAttention.html',1,'']]], + ['fp32llamaattention_5finput_25',['Fp32llamaAttention_input',['../structFp32llamaAttention__input.html',1,'']]], + ['fp32llamaattention_5foutput_26',['Fp32llamaAttention_output',['../structFp32llamaAttention__output.html',1,'']]], + ['fp32llamadecoder_27',['Fp32llamaDecoder',['../classFp32llamaDecoder.html',1,'']]], + ['fp32llamadecoder_5finput_28',['Fp32llamaDecoder_input',['../structFp32llamaDecoder__input.html',1,'']]], + ['fp32llamadecoder_5foutput_29',['Fp32llamaDecoder_output',['../structFp32llamaDecoder__output.html',1,'']]], + ['fp32llamadecoderlayer_30',['Fp32llamaDecoderLayer',['../classFp32llamaDecoderLayer.html',1,'']]], + ['fp32llamadecoderlayer_5finput_31',['Fp32llamaDecoderLayer_input',['../structFp32llamaDecoderLayer__input.html',1,'']]], + ['fp32llamadecoderlayer_5foutput_32',['Fp32llamaDecoderLayer_output',['../structFp32llamaDecoderLayer__output.html',1,'']]], + ['fp32llamaforcausallm_33',['Fp32LlamaForCausalLM',['../classFp32LlamaForCausalLM.html',1,'']]], + ['fp32llamaforcausallm_5finput_34',['Fp32LlamaForCausalLM_input',['../structFp32LlamaForCausalLM__input.html',1,'']]], + ['fp32llamaforcausallm_5foutput_35',['Fp32LlamaForCausalLM_output',['../structFp32LlamaForCausalLM__output.html',1,'']]], + ['fp32optattention_36',['Fp32OPTAttention',['../classFp32OPTAttention.html',1,'']]], + ['fp32optattention_5finput_37',['Fp32OPTAttention_input',['../structFp32OPTAttention__input.html',1,'']]], + ['fp32optattention_5foutput_38',['Fp32OPTAttention_output',['../structFp32OPTAttention__output.html',1,'']]], + ['fp32optdecoder_39',['Fp32OPTDecoder',['../classFp32OPTDecoder.html',1,'']]], + ['fp32optdecoder_5finput_40',['Fp32OPTDecoder_input',['../structFp32OPTDecoder__input.html',1,'']]], + ['fp32optdecoder_5foutput_41',['Fp32OPTDecoder_output',['../structFp32OPTDecoder__output.html',1,'']]], + ['fp32optdecoderlayer_42',['Fp32OPTDecoderLayer',['../classFp32OPTDecoderLayer.html',1,'']]], + ['fp32optdecoderlayer_5finput_43',['Fp32OPTDecoderLayer_input',['../structFp32OPTDecoderLayer__input.html',1,'']]], + ['fp32optdecoderlayer_5foutput_44',['Fp32OPTDecoderLayer_output',['../structFp32OPTDecoderLayer__output.html',1,'']]], + ['fp32optforcausallm_45',['Fp32OPTForCausalLM',['../classFp32OPTForCausalLM.html',1,'']]], + ['fp32optforcausallm_5finput_46',['Fp32OPTForCausalLM_input',['../structFp32OPTForCausalLM__input.html',1,'']]], + ['fp32optforcausallm_5foutput_47',['Fp32OPTForCausalLM_output',['../structFp32OPTForCausalLM__output.html',1,'']]], + ['from_20our_20model_20zoo_48',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]] ]; diff --git a/search/all_d.js b/search/all_d.js index f0ce6dc6..1a7f110b 100644 --- a/search/all_d.js +++ b/search/all_d.js @@ -17,7 +17,8 @@ var searchData= ['llama_5ftokenizer_14',['llama_tokenizer',['../structllama__tokenizer.html',1,'']]], ['llama_5fvocab_15',['llama_vocab',['../structllama__vocab.html',1,'']]], ['llamarmsnorm_16',['LlamaRMSNorm',['../classLlamaRMSNorm.html',1,'']]], - ['llm_20compression_3a_20smoothquant_20and_20awq_17',['LLM Compression: SmoothQuant and AWQ',['../index.html#autotoc_md4',1,'']]], - ['llm_20inference_20engine_3a_20tinychatengine_18',['LLM Inference Engine: TinyChatEngine',['../index.html#autotoc_md5',1,'']]], - ['llm_20inference_20library_19',['TinyChatEngine: On-Device LLM Inference Library',['../index.html#autotoc_md0',1,'']]] + ['llava_5fimage_5fembed_17',['llava_image_embed',['../structllava__image__embed.html',1,'']]], + ['llm_20compression_3a_20smoothquant_20and_20awq_18',['LLM Compression: SmoothQuant and AWQ',['../index.html#autotoc_md4',1,'']]], + ['llm_20inference_20engine_3a_20tinychatengine_19',['LLM Inference Engine: TinyChatEngine',['../index.html#autotoc_md5',1,'']]], + ['llm_20inference_20library_20',['TinyChatEngine: On-Device LLM Inference Library',['../index.html#autotoc_md0',1,'']]] ]; diff --git a/search/all_e.js b/search/all_e.js index 22914c06..d0a39595 100644 --- a/search/all_e.js +++ b/search/all_e.js @@ -11,13 +11,15 @@ var searchData= ['matrix3d_3c_20int_20_3e_8',['Matrix3D< int >',['../classMatrix3D.html',1,'']]], ['matrix3d_3c_20int8_5ft_20_3e_9',['Matrix3D< int8_t >',['../classMatrix3D.html',1,'']]], ['matrix3d_3c_20uint8_5ft_20_3e_10',['Matrix3D< uint8_t >',['../classMatrix3D.html',1,'']]], - ['max_5ferror_5finfo_11',['max_error_info',['../structmax__error__info.html',1,'']]], - ['metalmatmulbuffers_12',['MetalMatmulBuffers',['../structMetalMatmulBuffers.html',1,'']]], - ['metalmatmulint4_13',['MetalMatmulInt4',['../classMetalMatmulInt4.html',1,'']]], - ['metalmatmulint4imp_14',['MetalMatmulInt4IMP',['../classMetalMatmulInt4IMP.html',1,'']]], - ['metalmatmulparams_15',['MetalMatMulParams',['../structMetalMatMulParams.html',1,'']]], - ['model_20support_16',['Quantization and Model Support',['../index.html#autotoc_md14',1,'']]], - ['model_20zoo_17',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]], - ['model_5fconfig_18',['model_config',['../structmodel__config.html',1,'']]], - ['models_20from_20our_20model_20zoo_19',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]] + ['matrix4d_11',['Matrix4D',['../classMatrix4D.html',1,'']]], + ['matrix4d_3c_20float_20_3e_12',['Matrix4D< float >',['../classMatrix4D.html',1,'']]], + ['max_5ferror_5finfo_13',['max_error_info',['../structmax__error__info.html',1,'']]], + ['metalmatmulbuffers_14',['MetalMatmulBuffers',['../structMetalMatmulBuffers.html',1,'']]], + ['metalmatmulint4_15',['MetalMatmulInt4',['../classMetalMatmulInt4.html',1,'']]], + ['metalmatmulint4imp_16',['MetalMatmulInt4IMP',['../classMetalMatmulInt4IMP.html',1,'']]], + ['metalmatmulparams_17',['MetalMatMulParams',['../structMetalMatMulParams.html',1,'']]], + ['model_20support_18',['Quantization and Model Support',['../index.html#autotoc_md14',1,'']]], + ['model_20zoo_19',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]], + ['model_5fconfig_20',['model_config',['../structmodel__config.html',1,'']]], + ['models_20from_20our_20model_20zoo_21',['Download and Deploy Models from our Model Zoo',['../index.html#autotoc_md16',1,'']]] ]; diff --git a/search/classes_2.js b/search/classes_2.js index 2e208fef..a50519ef 100644 --- a/search/classes_2.js +++ b/search/classes_2.js @@ -1,4 +1,9 @@ var searchData= [ - ['comparator_0',['comparator',['../structllama__sp__bigram_1_1comparator.html',1,'llama_sp_bigram']]] + ['clip_5fimage_5ff32_0',['clip_image_f32',['../structclip__image__f32.html',1,'']]], + ['clip_5fimage_5fu8_1',['clip_image_u8',['../structclip__image__u8.html',1,'']]], + ['clip_5fmodel_5fconfig_2',['clip_model_config',['../structclip__model__config.html',1,'']]], + ['comparator_3',['comparator',['../structllama__sp__bigram_1_1comparator.html',1,'llama_sp_bigram']]], + ['conv2d_4',['Conv2D',['../classConv2D.html',1,'']]], + ['conv2d_5fparams_5',['Conv2D_params',['../structConv2D__params.html',1,'']]] ]; diff --git a/search/classes_4.js b/search/classes_4.js index c4baacdb..0cbdc4f5 100644 --- a/search/classes_4.js +++ b/search/classes_4.js @@ -1,39 +1,51 @@ var searchData= [ - ['fp32gptbigcodeattention_0',['Fp32GPTBigCodeAttention',['../classFp32GPTBigCodeAttention.html',1,'']]], - ['fp32gptbigcodeattention_5finput_1',['Fp32GPTBigCodeAttention_input',['../structFp32GPTBigCodeAttention__input.html',1,'']]], - ['fp32gptbigcodeattention_5foutput_2',['Fp32GPTBigCodeAttention_output',['../structFp32GPTBigCodeAttention__output.html',1,'']]], - ['fp32gptbigcodedecoder_3',['Fp32GPTBigCodeDecoder',['../classFp32GPTBigCodeDecoder.html',1,'']]], - ['fp32gptbigcodedecoder_5finput_4',['Fp32GPTBigCodeDecoder_input',['../structFp32GPTBigCodeDecoder__input.html',1,'']]], - ['fp32gptbigcodedecoder_5foutput_5',['Fp32GPTBigCodeDecoder_output',['../structFp32GPTBigCodeDecoder__output.html',1,'']]], - ['fp32gptbigcodedecoderlayer_6',['Fp32GPTBigCodeDecoderLayer',['../classFp32GPTBigCodeDecoderLayer.html',1,'']]], - ['fp32gptbigcodedecoderlayer_5finput_7',['Fp32GPTBigCodeDecoderLayer_input',['../structFp32GPTBigCodeDecoderLayer__input.html',1,'']]], - ['fp32gptbigcodedecoderlayer_5foutput_8',['Fp32GPTBigCodeDecoderLayer_output',['../structFp32GPTBigCodeDecoderLayer__output.html',1,'']]], - ['fp32gptbigcodeforcausallm_9',['Fp32GPTBigCodeForCausalLM',['../classFp32GPTBigCodeForCausalLM.html',1,'']]], - ['fp32gptbigcodeforcausallm_5finput_10',['Fp32GPTBigCodeForCausalLM_input',['../structFp32GPTBigCodeForCausalLM__input.html',1,'']]], - ['fp32gptbigcodeforcausallm_5foutput_11',['Fp32GPTBigCodeForCausalLM_output',['../structFp32GPTBigCodeForCausalLM__output.html',1,'']]], - ['fp32llamaattention_12',['Fp32llamaAttention',['../classFp32llamaAttention.html',1,'']]], - ['fp32llamaattention_5finput_13',['Fp32llamaAttention_input',['../structFp32llamaAttention__input.html',1,'']]], - ['fp32llamaattention_5foutput_14',['Fp32llamaAttention_output',['../structFp32llamaAttention__output.html',1,'']]], - ['fp32llamadecoder_15',['Fp32llamaDecoder',['../classFp32llamaDecoder.html',1,'']]], - ['fp32llamadecoder_5finput_16',['Fp32llamaDecoder_input',['../structFp32llamaDecoder__input.html',1,'']]], - ['fp32llamadecoder_5foutput_17',['Fp32llamaDecoder_output',['../structFp32llamaDecoder__output.html',1,'']]], - ['fp32llamadecoderlayer_18',['Fp32llamaDecoderLayer',['../classFp32llamaDecoderLayer.html',1,'']]], - ['fp32llamadecoderlayer_5finput_19',['Fp32llamaDecoderLayer_input',['../structFp32llamaDecoderLayer__input.html',1,'']]], - ['fp32llamadecoderlayer_5foutput_20',['Fp32llamaDecoderLayer_output',['../structFp32llamaDecoderLayer__output.html',1,'']]], - ['fp32llamaforcausallm_21',['Fp32LlamaForCausalLM',['../classFp32LlamaForCausalLM.html',1,'']]], - ['fp32llamaforcausallm_5finput_22',['Fp32LlamaForCausalLM_input',['../structFp32LlamaForCausalLM__input.html',1,'']]], - ['fp32llamaforcausallm_5foutput_23',['Fp32LlamaForCausalLM_output',['../structFp32LlamaForCausalLM__output.html',1,'']]], - ['fp32optattention_24',['Fp32OPTAttention',['../classFp32OPTAttention.html',1,'']]], - ['fp32optattention_5finput_25',['Fp32OPTAttention_input',['../structFp32OPTAttention__input.html',1,'']]], - ['fp32optattention_5foutput_26',['Fp32OPTAttention_output',['../structFp32OPTAttention__output.html',1,'']]], - ['fp32optdecoder_27',['Fp32OPTDecoder',['../classFp32OPTDecoder.html',1,'']]], - ['fp32optdecoder_5finput_28',['Fp32OPTDecoder_input',['../structFp32OPTDecoder__input.html',1,'']]], - ['fp32optdecoder_5foutput_29',['Fp32OPTDecoder_output',['../structFp32OPTDecoder__output.html',1,'']]], - ['fp32optdecoderlayer_30',['Fp32OPTDecoderLayer',['../classFp32OPTDecoderLayer.html',1,'']]], - ['fp32optdecoderlayer_5finput_31',['Fp32OPTDecoderLayer_input',['../structFp32OPTDecoderLayer__input.html',1,'']]], - ['fp32optdecoderlayer_5foutput_32',['Fp32OPTDecoderLayer_output',['../structFp32OPTDecoderLayer__output.html',1,'']]], - ['fp32optforcausallm_33',['Fp32OPTForCausalLM',['../classFp32OPTForCausalLM.html',1,'']]], - ['fp32optforcausallm_5finput_34',['Fp32OPTForCausalLM_input',['../structFp32OPTForCausalLM__input.html',1,'']]], - ['fp32optforcausallm_5foutput_35',['Fp32OPTForCausalLM_output',['../structFp32OPTForCausalLM__output.html',1,'']]] + ['fp32clipattention_0',['Fp32CLIPAttention',['../classFp32CLIPAttention.html',1,'']]], + ['fp32clipattention_5finput_1',['Fp32CLIPAttention_input',['../structFp32CLIPAttention__input.html',1,'']]], + ['fp32clipattention_5foutput_2',['Fp32CLIPAttention_output',['../structFp32CLIPAttention__output.html',1,'']]], + ['fp32clipencoder_3',['Fp32CLIPEncoder',['../classFp32CLIPEncoder.html',1,'']]], + ['fp32clipencoder_5finput_4',['Fp32CLIPEncoder_input',['../structFp32CLIPEncoder__input.html',1,'']]], + ['fp32clipencoder_5foutput_5',['Fp32CLIPEncoder_output',['../structFp32CLIPEncoder__output.html',1,'']]], + ['fp32clipencoderlayer_6',['Fp32CLIPEncoderLayer',['../classFp32CLIPEncoderLayer.html',1,'']]], + ['fp32clipencoderlayer_5finput_7',['Fp32CLIPEncoderLayer_input',['../structFp32CLIPEncoderLayer__input.html',1,'']]], + ['fp32clipencoderlayer_5foutput_8',['Fp32CLIPEncoderLayer_output',['../structFp32CLIPEncoderLayer__output.html',1,'']]], + ['fp32clipvisiontransformer_9',['Fp32CLIPVisionTransformer',['../classFp32CLIPVisionTransformer.html',1,'']]], + ['fp32clipvisiontransformer_5finput_10',['Fp32CLIPVisionTransformer_input',['../structFp32CLIPVisionTransformer__input.html',1,'']]], + ['fp32clipvisiontransformer_5foutput_11',['Fp32CLIPVisionTransformer_output',['../structFp32CLIPVisionTransformer__output.html',1,'']]], + ['fp32gptbigcodeattention_12',['Fp32GPTBigCodeAttention',['../classFp32GPTBigCodeAttention.html',1,'']]], + ['fp32gptbigcodeattention_5finput_13',['Fp32GPTBigCodeAttention_input',['../structFp32GPTBigCodeAttention__input.html',1,'']]], + ['fp32gptbigcodeattention_5foutput_14',['Fp32GPTBigCodeAttention_output',['../structFp32GPTBigCodeAttention__output.html',1,'']]], + ['fp32gptbigcodedecoder_15',['Fp32GPTBigCodeDecoder',['../classFp32GPTBigCodeDecoder.html',1,'']]], + ['fp32gptbigcodedecoder_5finput_16',['Fp32GPTBigCodeDecoder_input',['../structFp32GPTBigCodeDecoder__input.html',1,'']]], + ['fp32gptbigcodedecoder_5foutput_17',['Fp32GPTBigCodeDecoder_output',['../structFp32GPTBigCodeDecoder__output.html',1,'']]], + ['fp32gptbigcodedecoderlayer_18',['Fp32GPTBigCodeDecoderLayer',['../classFp32GPTBigCodeDecoderLayer.html',1,'']]], + ['fp32gptbigcodedecoderlayer_5finput_19',['Fp32GPTBigCodeDecoderLayer_input',['../structFp32GPTBigCodeDecoderLayer__input.html',1,'']]], + ['fp32gptbigcodedecoderlayer_5foutput_20',['Fp32GPTBigCodeDecoderLayer_output',['../structFp32GPTBigCodeDecoderLayer__output.html',1,'']]], + ['fp32gptbigcodeforcausallm_21',['Fp32GPTBigCodeForCausalLM',['../classFp32GPTBigCodeForCausalLM.html',1,'']]], + ['fp32gptbigcodeforcausallm_5finput_22',['Fp32GPTBigCodeForCausalLM_input',['../structFp32GPTBigCodeForCausalLM__input.html',1,'']]], + ['fp32gptbigcodeforcausallm_5foutput_23',['Fp32GPTBigCodeForCausalLM_output',['../structFp32GPTBigCodeForCausalLM__output.html',1,'']]], + ['fp32llamaattention_24',['Fp32llamaAttention',['../classFp32llamaAttention.html',1,'']]], + ['fp32llamaattention_5finput_25',['Fp32llamaAttention_input',['../structFp32llamaAttention__input.html',1,'']]], + ['fp32llamaattention_5foutput_26',['Fp32llamaAttention_output',['../structFp32llamaAttention__output.html',1,'']]], + ['fp32llamadecoder_27',['Fp32llamaDecoder',['../classFp32llamaDecoder.html',1,'']]], + ['fp32llamadecoder_5finput_28',['Fp32llamaDecoder_input',['../structFp32llamaDecoder__input.html',1,'']]], + ['fp32llamadecoder_5foutput_29',['Fp32llamaDecoder_output',['../structFp32llamaDecoder__output.html',1,'']]], + ['fp32llamadecoderlayer_30',['Fp32llamaDecoderLayer',['../classFp32llamaDecoderLayer.html',1,'']]], + ['fp32llamadecoderlayer_5finput_31',['Fp32llamaDecoderLayer_input',['../structFp32llamaDecoderLayer__input.html',1,'']]], + ['fp32llamadecoderlayer_5foutput_32',['Fp32llamaDecoderLayer_output',['../structFp32llamaDecoderLayer__output.html',1,'']]], + ['fp32llamaforcausallm_33',['Fp32LlamaForCausalLM',['../classFp32LlamaForCausalLM.html',1,'']]], + ['fp32llamaforcausallm_5finput_34',['Fp32LlamaForCausalLM_input',['../structFp32LlamaForCausalLM__input.html',1,'']]], + ['fp32llamaforcausallm_5foutput_35',['Fp32LlamaForCausalLM_output',['../structFp32LlamaForCausalLM__output.html',1,'']]], + ['fp32optattention_36',['Fp32OPTAttention',['../classFp32OPTAttention.html',1,'']]], + ['fp32optattention_5finput_37',['Fp32OPTAttention_input',['../structFp32OPTAttention__input.html',1,'']]], + ['fp32optattention_5foutput_38',['Fp32OPTAttention_output',['../structFp32OPTAttention__output.html',1,'']]], + ['fp32optdecoder_39',['Fp32OPTDecoder',['../classFp32OPTDecoder.html',1,'']]], + ['fp32optdecoder_5finput_40',['Fp32OPTDecoder_input',['../structFp32OPTDecoder__input.html',1,'']]], + ['fp32optdecoder_5foutput_41',['Fp32OPTDecoder_output',['../structFp32OPTDecoder__output.html',1,'']]], + ['fp32optdecoderlayer_42',['Fp32OPTDecoderLayer',['../classFp32OPTDecoderLayer.html',1,'']]], + ['fp32optdecoderlayer_5finput_43',['Fp32OPTDecoderLayer_input',['../structFp32OPTDecoderLayer__input.html',1,'']]], + ['fp32optdecoderlayer_5foutput_44',['Fp32OPTDecoderLayer_output',['../structFp32OPTDecoderLayer__output.html',1,'']]], + ['fp32optforcausallm_45',['Fp32OPTForCausalLM',['../classFp32OPTForCausalLM.html',1,'']]], + ['fp32optforcausallm_5finput_46',['Fp32OPTForCausalLM_input',['../structFp32OPTForCausalLM__input.html',1,'']]], + ['fp32optforcausallm_5foutput_47',['Fp32OPTForCausalLM_output',['../structFp32OPTForCausalLM__output.html',1,'']]] ]; diff --git a/search/classes_6.js b/search/classes_6.js index 59d0465c..03884bba 100644 --- a/search/classes_6.js +++ b/search/classes_6.js @@ -11,5 +11,6 @@ var searchData= ['llama_5fsp_5fsymbol_8',['llama_sp_symbol',['../structllama__sp__symbol.html',1,'']]], ['llama_5ftokenizer_9',['llama_tokenizer',['../structllama__tokenizer.html',1,'']]], ['llama_5fvocab_10',['llama_vocab',['../structllama__vocab.html',1,'']]], - ['llamarmsnorm_11',['LlamaRMSNorm',['../classLlamaRMSNorm.html',1,'']]] + ['llamarmsnorm_11',['LlamaRMSNorm',['../classLlamaRMSNorm.html',1,'']]], + ['llava_5fimage_5fembed_12',['llava_image_embed',['../structllava__image__embed.html',1,'']]] ]; diff --git a/search/classes_7.js b/search/classes_7.js index 8edf744e..3e6fefb9 100644 --- a/search/classes_7.js +++ b/search/classes_7.js @@ -8,10 +8,12 @@ var searchData= ['matrix3d_3c_20int_20_3e_5',['Matrix3D< int >',['../classMatrix3D.html',1,'']]], ['matrix3d_3c_20int8_5ft_20_3e_6',['Matrix3D< int8_t >',['../classMatrix3D.html',1,'']]], ['matrix3d_3c_20uint8_5ft_20_3e_7',['Matrix3D< uint8_t >',['../classMatrix3D.html',1,'']]], - ['max_5ferror_5finfo_8',['max_error_info',['../structmax__error__info.html',1,'']]], - ['metalmatmulbuffers_9',['MetalMatmulBuffers',['../structMetalMatmulBuffers.html',1,'']]], - ['metalmatmulint4_10',['MetalMatmulInt4',['../classMetalMatmulInt4.html',1,'']]], - ['metalmatmulint4imp_11',['MetalMatmulInt4IMP',['../classMetalMatmulInt4IMP.html',1,'']]], - ['metalmatmulparams_12',['MetalMatMulParams',['../structMetalMatMulParams.html',1,'']]], - ['model_5fconfig_13',['model_config',['../structmodel__config.html',1,'']]] + ['matrix4d_8',['Matrix4D',['../classMatrix4D.html',1,'']]], + ['matrix4d_3c_20float_20_3e_9',['Matrix4D< float >',['../classMatrix4D.html',1,'']]], + ['max_5ferror_5finfo_10',['max_error_info',['../structmax__error__info.html',1,'']]], + ['metalmatmulbuffers_11',['MetalMatmulBuffers',['../structMetalMatmulBuffers.html',1,'']]], + ['metalmatmulint4_12',['MetalMatmulInt4',['../classMetalMatmulInt4.html',1,'']]], + ['metalmatmulint4imp_13',['MetalMatmulInt4IMP',['../classMetalMatmulInt4IMP.html',1,'']]], + ['metalmatmulparams_14',['MetalMatMulParams',['../structMetalMatMulParams.html',1,'']]], + ['model_5fconfig_15',['model_config',['../structmodel__config.html',1,'']]] ]; diff --git a/search/classes_c.js b/search/classes_c.js index 9f6d3b7a..9588aab0 100644 --- a/search/classes_c.js +++ b/search/classes_c.js @@ -1,4 +1,5 @@ var searchData= [ - ['starcoder_5fvocab_0',['starcoder_vocab',['../structstarcoder__vocab.html',1,'']]] + ['starcoder_5fvocab_0',['starcoder_vocab',['../structstarcoder__vocab.html',1,'']]], + ['stbi_5fio_5fcallbacks_1',['stbi_io_callbacks',['../structstbi__io__callbacks.html',1,'']]] ]; diff --git a/stb__image_8h_source.html b/stb__image_8h_source.html new file mode 100644 index 00000000..219c3882 --- /dev/null +++ b/stb__image_8h_source.html @@ -0,0 +1,8483 @@ + + + + + + + +TinyChatEngine: llm/include/stb_image.h Source File + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ + +
+
+
stb_image.h
+
+
+
1/* stb_image - v2.28 - public domain image loader - http://nothings.org/stb
+
2 no warranty implied; use at your own risk
+
3
+
4 Do this:
+
5 #define STB_IMAGE_IMPLEMENTATION
+
6 before you include this file in *one* C or C++ file to create the implementation.
+
7
+
8 // i.e. it should look like this:
+
9 #include ...
+
10 #include ...
+
11 #include ...
+
12 #define STB_IMAGE_IMPLEMENTATION
+
13 #include "stb_image.h"
+
14
+
15 You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+
16 And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
17
+
18
+
19 QUICK NOTES:
+
20 Primarily of interest to game developers and other people who can
+
21 avoid problematic images and only need the trivial interface
+
22
+
23 JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+
24 PNG 1/2/4/8/16-bit-per-channel
+
25
+
26 TGA (not sure what subset, if a subset)
+
27 BMP non-1bpp, non-RLE
+
28 PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
29
+
30 GIF (*comp always reports as 4-channel)
+
31 HDR (radiance rgbE format)
+
32 PIC (Softimage PIC)
+
33 PNM (PPM and PGM binary only)
+
34
+
35 Animated GIF still needs a proper API, but here's one way to do it:
+
36 http://gist.github.com/urraka/685d9a6340b26b830d49
+
37
+
38 - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+
39 - decode from arbitrary I/O callbacks
+
40 - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
41
+
42 Full documentation under "DOCUMENTATION" below.
+
43
+
44
+
45LICENSE
+
46
+
47 See end of file for license information.
+
48
+
49RECENT REVISION HISTORY:
+
50
+
51 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
+
52 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
+
53 2.26 (2020-07-13) many minor fixes
+
54 2.25 (2020-02-02) fix warnings
+
55 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
+
56 2.23 (2019-08-11) fix clang static analysis warning
+
57 2.22 (2019-03-04) gif fixes, fix warnings
+
58 2.21 (2019-02-25) fix typo in comment
+
59 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+
60 2.19 (2018-02-11) fix warning
+
61 2.18 (2018-01-30) fix warnings
+
62 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+
63 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+
64 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+
65 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+
66 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+
67 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+
68 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+
69 RGB-format JPEG; remove white matting in PSD;
+
70 allocate large structures on the stack;
+
71 correct channel count for PNG & BMP
+
72 2.10 (2016-01-22) avoid warning introduced in 2.09
+
73 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
74
+
75 See end of file for full revision history.
+
76
+
77
+
78 ============================ Contributors =========================
+
79
+
80 Image formats Extensions, features
+
81 Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
+
82 Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
+
83 Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
+
84 Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
+
85 Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
+
86 Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
+
87 Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
+
88 github:urraka (animated gif) Junggon Kim (PNM comments)
+
89 Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
+
90 socks-the-fox (16-bit PNG)
+
91 Jeremy Sawicki (handle all ImageNet JPGs)
+
92 Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
+
93 Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
+
94 Arseny Kapoulkine Simon Breuss (16-bit PNM)
+
95 John-Mark Allen
+
96 Carmelo J Fdez-Aguera
+
97
+
98 Bug & warning fixes
+
99 Marc LeBlanc David Woo Guillaume George Martins Mozeiko
+
100 Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski
+
101 Phil Jordan Dave Moore Roy Eltham
+
102 Hayaki Saito Nathan Reed Won Chun
+
103 Luke Graham Johan Duparc Nick Verigakis the Horde3D community
+
104 Thomas Ruf Ronny Chevalier github:rlyeh
+
105 Janez Zemva John Bartholomew Michal Cichon github:romigrou
+
106 Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
+
107 Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
+
108 Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
+
109 Cass Everitt Ryamond Barbiero github:grim210
+
110 Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
+
111 Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
+
112 Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
+
113 Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
+
114 Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
+
115 Brad Weinberger Matvey Cherevko github:mosra
+
116 Luca Sas Alexander Veselov Zack Middleton [reserved]
+
117 Ryan C. Gordon [reserved] [reserved]
+
118 DO NOT ADD YOUR NAME HERE
+
119
+
120 Jacko Dirks
+
121
+
122 To add your name to the credits, pick a random blank space in the middle and fill it.
+
123 80% of merge conflicts on stb PRs are due to people adding their name at the end
+
124 of the credits.
+
125*/
+
126
+
127#ifndef STBI_INCLUDE_STB_IMAGE_H
+
128#define STBI_INCLUDE_STB_IMAGE_H
+
129
+
130// DOCUMENTATION
+
131//
+
132// Limitations:
+
133// - no 12-bit-per-channel JPEG
+
134// - no JPEGs with arithmetic coding
+
135// - GIF always returns *comp=4
+
136//
+
137// Basic usage (see HDR discussion below for HDR usage):
+
138// int x,y,n;
+
139// unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+
140// // ... process data if not NULL ...
+
141// // ... x = width, y = height, n = # 8-bit components per pixel ...
+
142// // ... replace '0' with '1'..'4' to force that many components per pixel
+
143// // ... but 'n' will always be the number that it would have been if you said 0
+
144// stbi_image_free(data);
+
145//
+
146// Standard parameters:
+
147// int *x -- outputs image width in pixels
+
148// int *y -- outputs image height in pixels
+
149// int *channels_in_file -- outputs # of image components in image file
+
150// int desired_channels -- if non-zero, # of image components requested in result
+
151//
+
152// The return value from an image loader is an 'unsigned char *' which points
+
153// to the pixel data, or NULL on an allocation failure or if the image is
+
154// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+
155// with each pixel consisting of N interleaved 8-bit components; the first
+
156// pixel pointed to is top-left-most in the image. There is no padding between
+
157// image scanlines or between pixels, regardless of format. The number of
+
158// components N is 'desired_channels' if desired_channels is non-zero, or
+
159// *channels_in_file otherwise. If desired_channels is non-zero,
+
160// *channels_in_file has the number of components that _would_ have been
+
161// output otherwise. E.g. if you set desired_channels to 4, you will always
+
162// get RGBA output, but you can check *channels_in_file to see if it's trivially
+
163// opaque because e.g. there were only 3 channels in the source image.
+
164//
+
165// An output image with N components has the following components interleaved
+
166// in this order in each pixel:
+
167//
+
168// N=#comp components
+
169// 1 grey
+
170// 2 grey, alpha
+
171// 3 red, green, blue
+
172// 4 red, green, blue, alpha
+
173//
+
174// If image loading fails for any reason, the return value will be NULL,
+
175// and *x, *y, *channels_in_file will be unchanged. The function
+
176// stbi_failure_reason() can be queried for an extremely brief, end-user
+
177// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+
178// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+
179// more user-friendly ones.
+
180//
+
181// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+
182//
+
183// To query the width, height and component count of an image without having to
+
184// decode the full file, you can use the stbi_info family of functions:
+
185//
+
186// int x,y,n,ok;
+
187// ok = stbi_info(filename, &x, &y, &n);
+
188// // returns ok=1 and sets x, y, n if image is a supported format,
+
189// // 0 otherwise.
+
190//
+
191// Note that stb_image pervasively uses ints in its public API for sizes,
+
192// including sizes of memory buffers. This is now part of the API and thus
+
193// hard to change without causing breakage. As a result, the various image
+
194// loaders all have certain limits on image size; these differ somewhat
+
195// by format but generally boil down to either just under 2GB or just under
+
196// 1GB. When the decoded image would be larger than this, stb_image decoding
+
197// will fail.
+
198//
+
199// Additionally, stb_image will reject image files that have any of their
+
200// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+
201// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+
202// the only way to have an image with such dimensions load correctly
+
203// is for it to have a rather extreme aspect ratio. Either way, the
+
204// assumption here is that such larger images are likely to be malformed
+
205// or malicious. If you do need to load an image with individual dimensions
+
206// larger than that, and it still fits in the overall size limit, you can
+
207// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+
208//
+
209// ===========================================================================
+
210//
+
211// UNICODE:
+
212//
+
213// If compiling for Windows and you wish to use Unicode filenames, compile
+
214// with
+
215// #define STBI_WINDOWS_UTF8
+
216// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+
217// Windows wchar_t filenames to utf8.
+
218//
+
219// ===========================================================================
+
220//
+
221// Philosophy
+
222//
+
223// stb libraries are designed with the following priorities:
+
224//
+
225// 1. easy to use
+
226// 2. easy to maintain
+
227// 3. good performance
+
228//
+
229// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+
230// and for best performance I may provide less-easy-to-use APIs that give higher
+
231// performance, in addition to the easy-to-use ones. Nevertheless, it's important
+
232// to keep in mind that from the standpoint of you, a client of this library,
+
233// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+
234//
+
235// Some secondary priorities arise directly from the first two, some of which
+
236// provide more explicit reasons why performance can't be emphasized.
+
237//
+
238// - Portable ("ease of use")
+
239// - Small source code footprint ("easy to maintain")
+
240// - No dependencies ("ease of use")
+
241//
+
242// ===========================================================================
+
243//
+
244// I/O callbacks
+
245//
+
246// I/O callbacks allow you to read from arbitrary sources, like packaged
+
247// files or some other source. Data read from callbacks are processed
+
248// through a small internal buffer (currently 128 bytes) to try to reduce
+
249// overhead.
+
250//
+
251// The three functions you must define are "read" (reads some bytes of data),
+
252// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+
253//
+
254// ===========================================================================
+
255//
+
256// SIMD support
+
257//
+
258// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+
259// supported by the compiler. For ARM Neon support, you must explicitly
+
260// request it.
+
261//
+
262// (The old do-it-yourself SIMD API is no longer supported in the current
+
263// code.)
+
264//
+
265// On x86, SSE2 will automatically be used when available based on a run-time
+
266// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+
267// the typical path is to have separate builds for NEON and non-NEON devices
+
268// (at least this is true for iOS and Android). Therefore, the NEON support is
+
269// toggled by a build flag: define STBI_NEON to get NEON loops.
+
270//
+
271// If for some reason you do not want to use any of SIMD code, or if
+
272// you have issues compiling it, you can disable it entirely by
+
273// defining STBI_NO_SIMD.
+
274//
+
275// ===========================================================================
+
276//
+
277// HDR image support (disable by defining STBI_NO_HDR)
+
278//
+
279// stb_image supports loading HDR images in general, and currently the Radiance
+
280// .HDR file format specifically. You can still load any file through the existing
+
281// interface; if you attempt to load an HDR file, it will be automatically remapped
+
282// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+
283// both of these constants can be reconfigured through this interface:
+
284//
+
285// stbi_hdr_to_ldr_gamma(2.2f);
+
286// stbi_hdr_to_ldr_scale(1.0f);
+
287//
+
288// (note, do not use _inverse_ constants; stbi_image will invert them
+
289// appropriately).
+
290//
+
291// Additionally, there is a new, parallel interface for loading files as
+
292// (linear) floats to preserve the full dynamic range:
+
293//
+
294// float *data = stbi_loadf(filename, &x, &y, &n, 0);
+
295//
+
296// If you load LDR images through this interface, those images will
+
297// be promoted to floating point values, run through the inverse of
+
298// constants corresponding to the above:
+
299//
+
300// stbi_ldr_to_hdr_scale(1.0f);
+
301// stbi_ldr_to_hdr_gamma(2.2f);
+
302//
+
303// Finally, given a filename (or an open file or memory block--see header
+
304// file for details) containing image data, you can query for the "most
+
305// appropriate" interface to use (that is, whether the image is HDR or
+
306// not), using:
+
307//
+
308// stbi_is_hdr(char *filename);
+
309//
+
310// ===========================================================================
+
311//
+
312// iPhone PNG support:
+
313//
+
314// We optionally support converting iPhone-formatted PNGs (which store
+
315// premultiplied BGRA) back to RGB, even though they're internally encoded
+
316// differently. To enable this conversion, call
+
317// stbi_convert_iphone_png_to_rgb(1).
+
318//
+
319// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+
320// pixel to remove any premultiplied alpha *only* if the image file explicitly
+
321// says there's premultiplied data (currently only happens in iPhone images,
+
322// and only if iPhone convert-to-rgb processing is on).
+
323//
+
324// ===========================================================================
+
325//
+
326// ADDITIONAL CONFIGURATION
+
327//
+
328// - You can suppress implementation of any of the decoders to reduce
+
329// your code footprint by #defining one or more of the following
+
330// symbols before creating the implementation.
+
331//
+
332// STBI_NO_JPEG
+
333// STBI_NO_PNG
+
334// STBI_NO_BMP
+
335// STBI_NO_PSD
+
336// STBI_NO_TGA
+
337// STBI_NO_GIF
+
338// STBI_NO_HDR
+
339// STBI_NO_PIC
+
340// STBI_NO_PNM (.ppm and .pgm)
+
341//
+
342// - You can request *only* certain decoders and suppress all other ones
+
343// (this will be more forward-compatible, as addition of new decoders
+
344// doesn't require you to disable them explicitly):
+
345//
+
346// STBI_ONLY_JPEG
+
347// STBI_ONLY_PNG
+
348// STBI_ONLY_BMP
+
349// STBI_ONLY_PSD
+
350// STBI_ONLY_TGA
+
351// STBI_ONLY_GIF
+
352// STBI_ONLY_HDR
+
353// STBI_ONLY_PIC
+
354// STBI_ONLY_PNM (.ppm and .pgm)
+
355//
+
356// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+
357// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+
358//
+
359// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
+
360// than that size (in either width or height) without further processing.
+
361// This is to let programs in the wild set an upper bound to prevent
+
362// denial-of-service attacks on untrusted data, as one could generate a
+
363// valid image of gigantic dimensions and force stb_image to allocate a
+
364// huge block of memory and spend disproportionate time decoding it. By
+
365// default this is set to (1 << 24), which is 16777216, but that's still
+
366// very big.
+
367
+
368#ifndef STBI_NO_STDIO
+
369#include <stdio.h>
+
370#endif // STBI_NO_STDIO
+
371
+
372#define STBI_VERSION 1
+
373
+
374enum {
+
375 STBI_default = 0, // only used for desired_channels
+
376
+
377 STBI_grey = 1,
+
378 STBI_grey_alpha = 2,
+
379 STBI_rgb = 3,
+
380 STBI_rgb_alpha = 4
+
381};
+
382
+
383#include <stdlib.h>
+
384typedef unsigned char stbi_uc;
+
385typedef unsigned short stbi_us;
+
386
+
387#ifdef __cplusplus
+
388extern "C" {
+
389#endif
+
390
+
391#ifndef STBIDEF
+
392#ifdef STB_IMAGE_STATIC
+
393#define STBIDEF static
+
394#else
+
395#define STBIDEF extern
+
396#endif
+
397#endif
+
398
+
400//
+
401// PRIMARY API - works on images of any type
+
402//
+
403
+
404//
+
405// load image by filename, open file, or memory buffer
+
406//
+
407
+
+
408typedef struct {
+
409 int (*read)(void * user, char * data,
+
410 int size); // fill 'data' with 'size' bytes. return number of bytes actually read
+
411 void (*skip)(void * user, int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+
412 int (*eof)(void * user); // returns nonzero if we are at end of file/data
+ +
+
414
+
416//
+
417// 8-bits-per-channel interface
+
418//
+
419
+
420STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * channels_in_file,
+
421 int desired_channels);
+
422STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y,
+ +
424
+
425#ifndef STBI_NO_STDIO
+
426STBIDEF stbi_uc * stbi_load(char const * filename, int * x, int * y, int * channels_in_file, int desired_channels);
+
427STBIDEF stbi_uc * stbi_load_from_file(FILE * f, int * x, int * y, int * channels_in_file, int desired_channels);
+
428// for stbi_load_from_file, file pointer is left pointing immediately after image
+
429#endif
+
430
+
431#ifndef STBI_NO_GIF
+
432STBIDEF stbi_uc * stbi_load_gif_from_memory(stbi_uc const * buffer, int len, int ** delays, int * x, int * y, int * z,
+
433 int * comp, int req_comp);
+
434#endif
+
435
+
436#ifdef STBI_WINDOWS_UTF8
+
437STBIDEF int stbi_convert_wchar_to_utf8(char * buffer, size_t bufferlen, const wchar_t * input);
+
438#endif
+
439
+
441//
+
442// 16-bits-per-channel interface
+
443//
+
444
+
445STBIDEF stbi_us * stbi_load_16_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * channels_in_file,
+
446 int desired_channels);
+
447STBIDEF stbi_us * stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y,
+ +
449
+
450#ifndef STBI_NO_STDIO
+
451STBIDEF stbi_us * stbi_load_16(char const * filename, int * x, int * y, int * channels_in_file, int desired_channels);
+
452STBIDEF stbi_us * stbi_load_from_file_16(FILE * f, int * x, int * y, int * channels_in_file, int desired_channels);
+
453#endif
+
454
+
456//
+
457// float-per-channel interface
+
458//
+
459#ifndef STBI_NO_LINEAR
+
460STBIDEF float * stbi_loadf_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * channels_in_file,
+
461 int desired_channels);
+
462STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y, int * channels_in_file,
+
463 int desired_channels);
+
464
+
465#ifndef STBI_NO_STDIO
+
466STBIDEF float * stbi_loadf(char const * filename, int * x, int * y, int * channels_in_file, int desired_channels);
+
467STBIDEF float * stbi_loadf_from_file(FILE * f, int * x, int * y, int * channels_in_file, int desired_channels);
+
468#endif
+
469#endif
+
470
+
471#ifndef STBI_NO_HDR
+
472STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
+
473STBIDEF void stbi_hdr_to_ldr_scale(float scale);
+
474#endif // STBI_NO_HDR
+
475
+
476#ifndef STBI_NO_LINEAR
+
477STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
+
478STBIDEF void stbi_ldr_to_hdr_scale(float scale);
+
479#endif // STBI_NO_LINEAR
+
480
+
481// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+
482STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk, void * user);
+
483STBIDEF int stbi_is_hdr_from_memory(stbi_uc const * buffer, int len);
+
484#ifndef STBI_NO_STDIO
+
485STBIDEF int stbi_is_hdr(char const * filename);
+
486STBIDEF int stbi_is_hdr_from_file(FILE * f);
+
487#endif // STBI_NO_STDIO
+
488
+
489// get a VERY brief reason for failure
+
490// on most compilers (and ALL modern mainstream compilers) this is threadsafe
+
491STBIDEF const char * stbi_failure_reason(void);
+
492
+
493// free the loaded image -- this is just free()
+
494STBIDEF void stbi_image_free(void * retval_from_stbi_load);
+
495
+
496// get image dimensions & components without fully decoding
+
497STBIDEF int stbi_info_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * comp);
+
498STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y, int * comp);
+
499STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const * buffer, int len);
+
500STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * clbk, void * user);
+
501
+
502#ifndef STBI_NO_STDIO
+
503STBIDEF int stbi_info(char const * filename, int * x, int * y, int * comp);
+
504STBIDEF int stbi_info_from_file(FILE * f, int * x, int * y, int * comp);
+
505STBIDEF int stbi_is_16_bit(char const * filename);
+
506STBIDEF int stbi_is_16_bit_from_file(FILE * f);
+
507#endif
+
508
+
509// for image formats that explicitly notate that they have premultiplied alpha,
+
510// we just return the colors as stored in the file. set this flag to force
+
511// unpremultiplication. results are undefined if the unpremultiply overflow.
+
512STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
513
+
514// indicate whether we should process iphone images back to canonical format,
+
515// or just pass them through "as-is"
+
516STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
517
+
518// flip the image vertically, so the first pixel in the output array is the bottom left
+
519STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
520
+
521// as above, but only applies to images loaded on the thread that calls the function
+
522// this function is only available if your compiler supports thread-local variables;
+
523// calling it will fail to link if your compiler doesn't
+
524STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+
525STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
+
526STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
+
527
+
528// ZLIB client - used by PNG, available for other purposes
+
529
+
530STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char * buffer, int len, int initial_size, int * outlen);
+
531STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer, int len, int initial_size, int * outlen,
+
532 int parse_header);
+
533STBIDEF char * stbi_zlib_decode_malloc(const char * buffer, int len, int * outlen);
+
534STBIDEF int stbi_zlib_decode_buffer(char * obuffer, int olen, const char * ibuffer, int ilen);
+
535
+
536STBIDEF char * stbi_zlib_decode_noheader_malloc(const char * buffer, int len, int * outlen);
+
537STBIDEF int stbi_zlib_decode_noheader_buffer(char * obuffer, int olen, const char * ibuffer, int ilen);
+
538
+
539#ifdef __cplusplus
+
540}
+
541#endif
+
542
+
543//
+
544//
+
546#endif // STBI_INCLUDE_STB_IMAGE_H
+
547
+
548#ifdef STB_IMAGE_IMPLEMENTATION
+
549
+
550#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) || defined(STBI_ONLY_TGA) || \
+
551 defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || \
+
552 defined(STBI_ONLY_PNM) || defined(STBI_ONLY_ZLIB)
+
553#ifndef STBI_ONLY_JPEG
+
554#define STBI_NO_JPEG
+
555#endif
+
556#ifndef STBI_ONLY_PNG
+
557#define STBI_NO_PNG
+
558#endif
+
559#ifndef STBI_ONLY_BMP
+
560#define STBI_NO_BMP
+
561#endif
+
562#ifndef STBI_ONLY_PSD
+
563#define STBI_NO_PSD
+
564#endif
+
565#ifndef STBI_ONLY_TGA
+
566#define STBI_NO_TGA
+
567#endif
+
568#ifndef STBI_ONLY_GIF
+
569#define STBI_NO_GIF
+
570#endif
+
571#ifndef STBI_ONLY_HDR
+
572#define STBI_NO_HDR
+
573#endif
+
574#ifndef STBI_ONLY_PIC
+
575#define STBI_NO_PIC
+
576#endif
+
577#ifndef STBI_ONLY_PNM
+
578#define STBI_NO_PNM
+
579#endif
+
580#endif
+
581
+
582#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+
583#define STBI_NO_ZLIB
+
584#endif
+
585
+
586#include <limits.h>
+
587#include <stdarg.h>
+
588#include <stddef.h> // ptrdiff_t on osx
+
589#include <stdlib.h>
+
590#include <string.h>
+
591
+
592#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+
593#include <math.h> // ldexp, pow
+
594#endif
+
595
+
596#ifndef STBI_NO_STDIO
+
597#include <stdio.h>
+
598#endif
+
599
+
600#ifndef STBI_ASSERT
+
601#include <assert.h>
+
602#define STBI_ASSERT(x) assert(x)
+
603#endif
+
604
+
605#ifdef __cplusplus
+
606#define STBI_EXTERN extern "C"
+
607#else
+
608#define STBI_EXTERN extern
+
609#endif
+
610
+
611#ifndef _MSC_VER
+
612#ifdef __cplusplus
+
613#define stbi_inline inline
+
614#else
+
615#define stbi_inline
+
616#endif
+
617#else
+
618#define stbi_inline __forceinline
+
619#endif
+
620
+
621#ifndef STBI_NO_THREAD_LOCALS
+
622#if defined(__cplusplus) && __cplusplus >= 201103L
+
623#define STBI_THREAD_LOCAL thread_local
+
624#elif defined(__GNUC__) && __GNUC__ < 5
+
625#define STBI_THREAD_LOCAL __thread
+
626#elif defined(_MSC_VER)
+
627#define STBI_THREAD_LOCAL __declspec(thread)
+
628#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+
629#define STBI_THREAD_LOCAL _Thread_local
+
630#endif
+
631
+
632#ifndef STBI_THREAD_LOCAL
+
633#if defined(__GNUC__)
+
634#define STBI_THREAD_LOCAL __thread
+
635#endif
+
636#endif
+
637#endif
+
638
+
639#if defined(_MSC_VER) || defined(__SYMBIAN32__)
+
640typedef unsigned short stbi__uint16;
+
641typedef signed short stbi__int16;
+
642typedef unsigned int stbi__uint32;
+
643typedef signed int stbi__int32;
+
644#else
+
645#include <stdint.h>
+
646typedef uint16_t stbi__uint16;
+
647typedef int16_t stbi__int16;
+
648typedef uint32_t stbi__uint32;
+
649typedef int32_t stbi__int32;
+
650#endif
+
651
+
652// should produce compiler error if size is wrong
+
653typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
+
654
+
655#ifdef _MSC_VER
+
656#define STBI_NOTUSED(v) (void)(v)
+
657#else
+
658#define STBI_NOTUSED(v) (void)sizeof(v)
+
659#endif
+
660
+
661#ifdef _MSC_VER
+
662#define STBI_HAS_LROTL
+
663#endif
+
664
+
665#ifdef STBI_HAS_LROTL
+
666#define stbi_lrot(x, y) _lrotl(x, y)
+
667#else
+
668#define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (-(y)&31)))
+
669#endif
+
670
+
671#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+
672// ok
+
673#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+
674// ok
+
675#else
+
676#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+
677#endif
+
678
+
679#ifndef STBI_MALLOC
+
680#define STBI_MALLOC(sz) malloc(sz)
+
681#define STBI_REALLOC(p, newsz) realloc(p, newsz)
+
682#define STBI_FREE(p) free(p)
+
683#endif
+
684
+
685#ifndef STBI_REALLOC_SIZED
+
686#define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)
+
687#endif
+
688
+
689// x86/x64 detection
+
690#if defined(__x86_64__) || defined(_M_X64)
+
691#define STBI__X64_TARGET
+
692#elif defined(__i386) || defined(_M_IX86)
+
693#define STBI__X86_TARGET
+
694#endif
+
695
+
696#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+
697// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+
698// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+
699// but previous attempts to provide the SSE2 functions with runtime
+
700// detection caused numerous issues. The way architecture extensions are
+
701// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+
702// New behavior: if compiled with -msse2, we use SSE2 without any
+
703// detection; if not, we don't use it at all.
+
704#define STBI_NO_SIMD
+
705#endif
+
706
+
707#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+
708// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+
709//
+
710// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+
711// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+
712// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+
713// simultaneously enabling "-mstackrealign".
+
714//
+
715// See https://github.com/nothings/stb/issues/81 for more information.
+
716//
+
717// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+
718// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+
719#define STBI_NO_SIMD
+
720#endif
+
721
+
722#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+
723#define STBI_SSE2
+
724#include <emmintrin.h>
+
725
+
726#ifdef _MSC_VER
+
727
+
728#if _MSC_VER >= 1400 // not VC6
+
729#include <intrin.h> // __cpuid
+
730static int stbi__cpuid3(void) {
+
731 int info[4];
+
732 __cpuid(info, 1);
+
733 return info[3];
+
734}
+
735#else
+
736static int stbi__cpuid3(void) {
+
737 int res;
+
738 __asm {
+
739 mov eax,1
+
740 cpuid
+
741 mov res,edx
+
742 }
+
743 return res;
+
744}
+
745#endif
+
746
+
747#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
748
+
749#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+
750static int stbi__sse2_available(void) {
+
751 int info3 = stbi__cpuid3();
+
752 return ((info3 >> 26) & 1) != 0;
+
753}
+
754#endif
+
755
+
756#else // assume GCC-style if not VC++
+
757#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
758
+
759#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+
760static int stbi__sse2_available(void) {
+
761 // If we're even attempting to compile this on GCC/Clang, that means
+
762 // -msse2 is on, which means the compiler is allowed to use SSE2
+
763 // instructions at will, and so are we.
+
764 return 1;
+
765}
+
766#endif
+
767
+
768#endif
+
769#endif
+
770
+
771// ARM NEON
+
772#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+
773#undef STBI_NEON
+
774#endif
+
775
+
776#ifdef STBI_NEON
+
777#include <arm_neon.h>
+
778#ifdef _MSC_VER
+
779#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
780#else
+
781#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
782#endif
+
783#endif
+
784
+
785#ifndef STBI_SIMD_ALIGN
+
786#define STBI_SIMD_ALIGN(type, name) type name
+
787#endif
+
788
+
789#ifndef STBI_MAX_DIMENSIONS
+
790#define STBI_MAX_DIMENSIONS (1 << 24)
+
791#endif
+
792
+
794//
+
795// stbi__context struct and start_xxx functions
+
796
+
797// stbi__context structure is our basic context used by all images, so it
+
798// contains all the IO context, plus some basic image information
+
799typedef struct {
+ +
801 int img_n, img_out_n;
+
802
+ +
804 void * io_user_data;
+
805
+ +
807 int buflen;
+ + +
810
+ + + +
814
+
815static void stbi__refill_buffer(stbi__context * s);
+
816
+
817// initialize a memory-decode context
+
818static void stbi__start_mem(stbi__context * s, stbi_uc const * buffer, int len) {
+
819 s->io.read = NULL;
+
820 s->read_from_callbacks = 0;
+
821 s->callback_already_read = 0;
+
822 s->img_buffer = s->img_buffer_original = (stbi_uc *)buffer;
+
823 s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *)buffer + len;
+
824}
+
825
+
826// initialize a callback-based context
+ +
828 s->io = *c;
+
829 s->io_user_data = user;
+
830 s->buflen = sizeof(s->buffer_start);
+
831 s->read_from_callbacks = 1;
+
832 s->callback_already_read = 0;
+
833 s->img_buffer = s->img_buffer_original = s->buffer_start;
+ +
835 s->img_buffer_original_end = s->img_buffer_end;
+
836}
+
837
+
838#ifndef STBI_NO_STDIO
+
839
+
840static int stbi__stdio_read(void * user, char * data, int size) { return (int)fread(data, 1, size, (FILE *)user); }
+
841
+
842static void stbi__stdio_skip(void * user, int n) {
+
843 int ch;
+
844 fseek((FILE *)user, n, SEEK_CUR);
+
845 ch = fgetc((FILE *)user); /* have to read a byte to reset feof()'s flag */
+
846 if (ch != EOF) {
+
847 ungetc(ch, (FILE *)user); /* push byte back onto stream if valid. */
+
848 }
+
849}
+
850
+
851static int stbi__stdio_eof(void * user) { return feof((FILE *)user) || ferror((FILE *)user); }
+
852
+ + + + +
857};
+
858
+ +
860
+
861// static void stop_file(stbi__context *s) { }
+
862
+
863#endif // !STBI_NO_STDIO
+
864
+
865static void stbi__rewind(stbi__context * s) {
+
866 // conceptually rewind SHOULD rewind to the beginning of the stream,
+
867 // but we just rewind to the beginning of the initial buffer, because
+
868 // we only use it after doing 'test', which only ever looks at at most 92 bytes
+
869 s->img_buffer = s->img_buffer_original;
+
870 s->img_buffer_end = s->img_buffer_original_end;
+
871}
+
872
+ +
874
+
875typedef struct {
+ +
877 int num_channels;
+
878 int channel_order;
+ +
880
+
881#ifndef STBI_NO_JPEG
+
882static int stbi__jpeg_test(stbi__context * s);
+
883static void * stbi__jpeg_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
884static int stbi__jpeg_info(stbi__context * s, int * x, int * y, int * comp);
+
885#endif
+
886
+
887#ifndef STBI_NO_PNG
+
888static int stbi__png_test(stbi__context * s);
+
889static void * stbi__png_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
890static int stbi__png_info(stbi__context * s, int * x, int * y, int * comp);
+
891static int stbi__png_is16(stbi__context * s);
+
892#endif
+
893
+
894#ifndef STBI_NO_BMP
+
895static int stbi__bmp_test(stbi__context * s);
+
896static void * stbi__bmp_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
897static int stbi__bmp_info(stbi__context * s, int * x, int * y, int * comp);
+
898#endif
+
899
+
900#ifndef STBI_NO_TGA
+
901static int stbi__tga_test(stbi__context * s);
+
902static void * stbi__tga_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
903static int stbi__tga_info(stbi__context * s, int * x, int * y, int * comp);
+
904#endif
+
905
+
906#ifndef STBI_NO_PSD
+
907static int stbi__psd_test(stbi__context * s);
+
908static void * stbi__psd_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri, int bpc);
+
909static int stbi__psd_info(stbi__context * s, int * x, int * y, int * comp);
+
910static int stbi__psd_is16(stbi__context * s);
+
911#endif
+
912
+
913#ifndef STBI_NO_HDR
+
914static int stbi__hdr_test(stbi__context * s);
+
915static float * stbi__hdr_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
916static int stbi__hdr_info(stbi__context * s, int * x, int * y, int * comp);
+
917#endif
+
918
+
919#ifndef STBI_NO_PIC
+
920static int stbi__pic_test(stbi__context * s);
+
921static void * stbi__pic_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
922static int stbi__pic_info(stbi__context * s, int * x, int * y, int * comp);
+
923#endif
+
924
+
925#ifndef STBI_NO_GIF
+
926static int stbi__gif_test(stbi__context * s);
+
927static void * stbi__gif_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
928static void * stbi__load_gif_main(stbi__context * s, int ** delays, int * x, int * y, int * z, int * comp, int req_comp);
+
929static int stbi__gif_info(stbi__context * s, int * x, int * y, int * comp);
+
930#endif
+
931
+
932#ifndef STBI_NO_PNM
+
933static int stbi__pnm_test(stbi__context * s);
+
934static void * stbi__pnm_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri);
+
935static int stbi__pnm_info(stbi__context * s, int * x, int * y, int * comp);
+
936static int stbi__pnm_is16(stbi__context * s);
+
937#endif
+
938
+
939static
+
940#ifdef STBI_THREAD_LOCAL
+ +
942#endif
+
943 const char * stbi__g_failure_reason;
+
944
+
945STBIDEF const char * stbi_failure_reason(void) { return stbi__g_failure_reason; }
+
946
+
947#ifndef STBI_NO_FAILURE_STRINGS
+
948static int stbi__err(const char * str) {
+ +
950 return 0;
+
951}
+
952#endif
+
953
+
954static void * stbi__malloc(size_t size) { return STBI_MALLOC(size); }
+
955
+
956// stb_image uses ints pervasively, including for offset calculations.
+
957// therefore the largest decoded image size we can support with the
+
958// current code, even on 64-bit targets, is INT_MAX. this is not a
+
959// significant limitation for the intended use case.
+
960//
+
961// we do, however, need to make sure our size calculations don't
+
962// overflow. hence a few helper functions for size calculations that
+
963// multiply integers together, making sure that they're non-negative
+
964// and no overflow occurs.
+
965
+
966// return 1 if the sum is valid, 0 on overflow.
+
967// negative terms are considered invalid.
+
968static int stbi__addsizes_valid(int a, int b) {
+
969 if (b < 0)
+
970 return 0;
+
971 // now 0 <= b <= INT_MAX, hence also
+
972 // 0 <= INT_MAX - b <= INTMAX.
+
973 // And "a + b <= INT_MAX" (which might overflow) is the
+
974 // same as a <= INT_MAX - b (no overflow)
+
975 return a <= INT_MAX - b;
+
976}
+
977
+
978// returns 1 if the product is valid, 0 on overflow.
+
979// negative factors are considered invalid.
+
980static int stbi__mul2sizes_valid(int a, int b) {
+
981 if (a < 0 || b < 0)
+
982 return 0;
+
983 if (b == 0)
+
984 return 1; // mul-by-0 is always safe
+
985 // portable way to check for no overflows in a*b
+
986 return a <= INT_MAX / b;
+
987}
+
988
+
989#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+
990// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+
991static int stbi__mad2sizes_valid(int a, int b, int add) {
+
992 return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
+
993}
+
994#endif
+
995
+
996// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+
997static int stbi__mad3sizes_valid(int a, int b, int c, int add) {
+ +
999}
+
1000
+
1001// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+
1002#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+
1003static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) {
+ +
1005 stbi__addsizes_valid(a * b * c * d, add);
+
1006}
+
1007#endif
+
1008
+
1009#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+
1010// mallocs with size overflow checking
+
1011static void * stbi__malloc_mad2(int a, int b, int add) {
+
1012 if (!stbi__mad2sizes_valid(a, b, add))
+
1013 return NULL;
+
1014 return stbi__malloc(a * b + add);
+
1015}
+
1016#endif
+
1017
+
1018static void * stbi__malloc_mad3(int a, int b, int c, int add) {
+
1019 if (!stbi__mad3sizes_valid(a, b, c, add))
+
1020 return NULL;
+
1021 return stbi__malloc(a * b * c + add);
+
1022}
+
1023
+
1024#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+
1025static void * stbi__malloc_mad4(int a, int b, int c, int d, int add) {
+
1026 if (!stbi__mad4sizes_valid(a, b, c, d, add))
+
1027 return NULL;
+
1028 return stbi__malloc(a * b * c * d + add);
+
1029}
+
1030#endif
+
1031
+
1032// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
+
1033static int stbi__addints_valid(int a, int b) {
+
1034 if ((a >= 0) != (b >= 0))
+
1035 return 1; // a and b have different signs, so no overflow
+
1036 if (a < 0 && b < 0)
+
1037 return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
+
1038 return a <= INT_MAX - b;
+
1039}
+
1040
+
1041// returns 1 if the product of two signed shorts is valid, 0 on overflow.
+
1042static int stbi__mul2shorts_valid(short a, short b) {
+
1043 if (b == 0 || b == -1)
+
1044 return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
+
1045 if ((a >= 0) == (b >= 0))
+
1046 return a <= SHRT_MAX / b; // product is positive, so similar to mul2sizes_valid
+
1047 if (b < 0)
+
1048 return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
+
1049 return a >= SHRT_MIN / b;
+
1050}
+
1051
+
1052// stbi__err - error
+
1053// stbi__errpf - error returning pointer to float
+
1054// stbi__errpuc - error returning pointer to unsigned char
+
1055
+
1056#ifdef STBI_NO_FAILURE_STRINGS
+
1057#define stbi__err(x, y) 0
+
1058#elif defined(STBI_FAILURE_USERMSG)
+
1059#define stbi__err(x, y) stbi__err(y)
+
1060#else
+
1061#define stbi__err(x, y) stbi__err(x)
+
1062#endif
+
1063
+
1064#define stbi__errpf(x, y) ((float *)(size_t)(stbi__err(x, y) ? NULL : NULL))
+
1065#define stbi__errpuc(x, y) ((unsigned char *)(size_t)(stbi__err(x, y) ? NULL : NULL))
+
1066
+
1067STBIDEF void stbi_image_free(void * retval_from_stbi_load) { STBI_FREE(retval_from_stbi_load); }
+
1068
+
1069#ifndef STBI_NO_LINEAR
+
1070static float * stbi__ldr_to_hdr(stbi_uc * data, int x, int y, int comp);
+
1071#endif
+
1072
+
1073#ifndef STBI_NO_HDR
+
1074static stbi_uc * stbi__hdr_to_ldr(float * data, int x, int y, int comp);
+
1075#endif
+
1076
+ +
1078
+
1079STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) {
+ +
1081}
+
1082
+
1083#ifndef STBI_THREAD_LOCAL
+
1084#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
+
1085#else
+ +
1087
+
1088STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) {
+ + +
1091}
+
1092
+
1093#define stbi__vertically_flip_on_load \
+
1094 (stbi__vertically_flip_on_load_set ? stbi__vertically_flip_on_load_local : stbi__vertically_flip_on_load_global)
+
1095#endif // STBI_THREAD_LOCAL
+
1096
+
1097static void * stbi__load_main(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri, int bpc) {
+
1098 memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+
1099 ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+
1100 ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+
1101 ri->num_channels = 0;
+
1102
+
1103// test the formats with a very explicit header first (at least a FOURCC
+
1104// or distinctive magic number first)
+
1105#ifndef STBI_NO_PNG
+
1106 if (stbi__png_test(s))
+
1107 return stbi__png_load(s, x, y, comp, req_comp, ri);
+
1108#endif
+
1109#ifndef STBI_NO_BMP
+
1110 if (stbi__bmp_test(s))
+
1111 return stbi__bmp_load(s, x, y, comp, req_comp, ri);
+
1112#endif
+
1113#ifndef STBI_NO_GIF
+
1114 if (stbi__gif_test(s))
+
1115 return stbi__gif_load(s, x, y, comp, req_comp, ri);
+
1116#endif
+
1117#ifndef STBI_NO_PSD
+
1118 if (stbi__psd_test(s))
+
1119 return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
+
1120#else
+ +
1122#endif
+
1123#ifndef STBI_NO_PIC
+
1124 if (stbi__pic_test(s))
+
1125 return stbi__pic_load(s, x, y, comp, req_comp, ri);
+
1126#endif
+
1127
+
1128// then the formats that can end up attempting to load with just 1 or 2
+
1129// bytes matching expectations; these are prone to false positives, so
+
1130// try them later
+
1131#ifndef STBI_NO_JPEG
+
1132 if (stbi__jpeg_test(s))
+
1133 return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
+
1134#endif
+
1135#ifndef STBI_NO_PNM
+
1136 if (stbi__pnm_test(s))
+
1137 return stbi__pnm_load(s, x, y, comp, req_comp, ri);
+
1138#endif
+
1139
+
1140#ifndef STBI_NO_HDR
+
1141 if (stbi__hdr_test(s)) {
+
1142 float * hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
+
1143 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+
1144 }
+
1145#endif
+
1146
+
1147#ifndef STBI_NO_TGA
+
1148 // test tga last because it's a crappy test!
+
1149 if (stbi__tga_test(s))
+
1150 return stbi__tga_load(s, x, y, comp, req_comp, ri);
+
1151#endif
+
1152
+
1153 return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+
1154}
+
1155
+
1156static stbi_uc * stbi__convert_16_to_8(stbi__uint16 * orig, int w, int h, int channels) {
+
1157 int i;
+
1158 int img_len = w * h * channels;
+
1159 stbi_uc * reduced;
+
1160
+ +
1162 if (reduced == NULL)
+
1163 return stbi__errpuc("outofmem", "Out of memory");
+
1164
+
1165 for (i = 0; i < img_len; ++i)
+
1166 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
1167
+
1168 STBI_FREE(orig);
+
1169 return reduced;
+
1170}
+
1171
+
1172static stbi__uint16 * stbi__convert_8_to_16(stbi_uc * orig, int w, int h, int channels) {
+
1173 int i;
+
1174 int img_len = w * h * channels;
+ +
1176
+ +
1178 if (enlarged == NULL)
+
1179 return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
+
1180
+
1181 for (i = 0; i < img_len; ++i)
+
1182 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
1183
+
1184 STBI_FREE(orig);
+
1185 return enlarged;
+
1186}
+
1187
+
1188static void stbi__vertical_flip(void * image, int w, int h, int bytes_per_pixel) {
+
1189 int row;
+ +
1191 stbi_uc temp[2048];
+
1192 stbi_uc * bytes = (stbi_uc *)image;
+
1193
+
1194 for (row = 0; row < (h >> 1); row++) {
+
1195 stbi_uc * row0 = bytes + row * bytes_per_row;
+
1196 stbi_uc * row1 = bytes + (h - row - 1) * bytes_per_row;
+
1197 // swap row0 with row1
+
1198 size_t bytes_left = bytes_per_row;
+
1199 while (bytes_left) {
+
1200 size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+
1201 memcpy(temp, row0, bytes_copy);
+ +
1203 memcpy(row1, temp, bytes_copy);
+
1204 row0 += bytes_copy;
+
1205 row1 += bytes_copy;
+ +
1207 }
+
1208 }
+
1209}
+
1210
+
1211#ifndef STBI_NO_GIF
+
1212static void stbi__vertical_flip_slices(void * image, int w, int h, int z, int bytes_per_pixel) {
+
1213 int slice;
+
1214 int slice_size = w * h * bytes_per_pixel;
+
1215
+
1216 stbi_uc * bytes = (stbi_uc *)image;
+
1217 for (slice = 0; slice < z; ++slice) {
+ +
1219 bytes += slice_size;
+
1220 }
+
1221}
+
1222#endif
+
1223
+
1224static unsigned char * stbi__load_and_postprocess_8bit(stbi__context * s, int * x, int * y, int * comp, int req_comp) {
+ +
1226 void * result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
1227
+
1228 if (result == NULL)
+
1229 return NULL;
+
1230
+
1231 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+
1232 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
1233
+
1234 if (ri.bits_per_channel != 8) {
+ +
1236 ri.bits_per_channel = 8;
+
1237 }
+
1238
+
1239 // @TODO: move stbi__convert_format to here
+
1240
+ +
1242 int channels = req_comp ? req_comp : *comp;
+
1243 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+
1244 }
+
1245
+
1246 return (unsigned char *)result;
+
1247}
+
1248
+
1249static stbi__uint16 * stbi__load_and_postprocess_16bit(stbi__context * s, int * x, int * y, int * comp, int req_comp) {
+ +
1251 void * result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
1252
+
1253 if (result == NULL)
+
1254 return NULL;
+
1255
+
1256 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+
1257 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
1258
+
1259 if (ri.bits_per_channel != 16) {
+ +
1261 ri.bits_per_channel = 16;
+
1262 }
+
1263
+
1264 // @TODO: move stbi__convert_format16 to here
+
1265 // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
1266
+ +
1268 int channels = req_comp ? req_comp : *comp;
+ +
1270 }
+
1271
+
1272 return (stbi__uint16 *)result;
+
1273}
+
1274
+
1275#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
+
1276static void stbi__float_postprocess(float * result, int * x, int * y, int * comp, int req_comp) {
+ +
1278 int channels = req_comp ? req_comp : *comp;
+
1279 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+
1280 }
+
1281}
+
1282#endif
+
1283
+
1284#ifndef STBI_NO_STDIO
+
1285
+
1286#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+
1287STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char * str,
+
1288 int cbmb, wchar_t * widestr, int cchwide);
+
1289STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags,
+
1290 const wchar_t * widestr, int cchwide, char * str, int cbmb,
+
1291 const char * defchar, int * used_default);
+
1292#endif
+
1293
+
1294#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+
1295STBIDEF int stbi_convert_wchar_to_utf8(char * buffer, size_t bufferlen, const wchar_t * input) {
+
1296 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);
+
1297}
+
1298#endif
+
1299
+
1300static FILE * stbi__fopen(char const * filename, char const * mode) {
+
1301 FILE * f;
+
1302#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+
1303 wchar_t wMode[64];
+
1304 wchar_t wFilename[1024];
+
1305 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename) / sizeof(*wFilename)))
+
1306 return 0;
+
1307
+
1308 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode) / sizeof(*wMode)))
+
1309 return 0;
+
1310
+
1311#if defined(_MSC_VER) && _MSC_VER >= 1400
+
1312 if (0 != _wfopen_s(&f, wFilename, wMode))
+
1313 f = 0;
+
1314#else
+ +
1316#endif
+
1317
+
1318#elif defined(_MSC_VER) && _MSC_VER >= 1400
+
1319 if (0 != fopen_s(&f, filename, mode))
+
1320 f = 0;
+
1321#else
+
1322 f = fopen(filename, mode);
+
1323#endif
+
1324 return f;
+
1325}
+
1326
+
1327STBIDEF stbi_uc * stbi_load(char const * filename, int * x, int * y, int * comp, int req_comp) {
+
1328 FILE * f = stbi__fopen(filename, "rb");
+
1329 unsigned char * result;
+
1330 if (!f)
+
1331 return stbi__errpuc("can't fopen", "Unable to open file");
+
1332 result = stbi_load_from_file(f, x, y, comp, req_comp);
+
1333 fclose(f);
+
1334 return result;
+
1335}
+
1336
+
1337STBIDEF stbi_uc * stbi_load_from_file(FILE * f, int * x, int * y, int * comp, int req_comp) {
+
1338 unsigned char * result;
+ +
1340 stbi__start_file(&s, f);
+ +
1342 if (result) {
+
1343 // need to 'unget' all the characters in the IO buffer
+
1344 fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+
1345 }
+
1346 return result;
+
1347}
+
1348
+
1349STBIDEF stbi__uint16 * stbi_load_from_file_16(FILE * f, int * x, int * y, int * comp, int req_comp) {
+ + +
1352 stbi__start_file(&s, f);
+ +
1354 if (result) {
+
1355 // need to 'unget' all the characters in the IO buffer
+
1356 fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+
1357 }
+
1358 return result;
+
1359}
+
1360
+
1361STBIDEF stbi_us * stbi_load_16(char const * filename, int * x, int * y, int * comp, int req_comp) {
+
1362 FILE * f = stbi__fopen(filename, "rb");
+ +
1364 if (!f)
+
1365 return (stbi_us *)stbi__errpuc("can't fopen", "Unable to open file");
+
1366 result = stbi_load_from_file_16(f, x, y, comp, req_comp);
+
1367 fclose(f);
+
1368 return result;
+
1369}
+
1370
+
1371#endif
+
1372
+
1373STBIDEF stbi_us * stbi_load_16_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * channels_in_file,
+
1374 int desired_channels) {
+ + + +
1378}
+
1379
+
1380STBIDEF stbi_us * stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y,
+ + + + +
1385}
+
1386
+
1387STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * comp, int req_comp) {
+ + + +
1391}
+
1392
+
1393STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y, int * comp,
+
1394 int req_comp) {
+ + + +
1398}
+
1399
+
1400#ifndef STBI_NO_GIF
+
1401STBIDEF stbi_uc * stbi_load_gif_from_memory(stbi_uc const * buffer, int len, int ** delays, int * x, int * y, int * z,
+
1402 int * comp, int req_comp) {
+
1403 unsigned char * result;
+ + +
1406
+
1407 result = (unsigned char *)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+ + +
1410 }
+
1411
+
1412 return result;
+
1413}
+
1414#endif
+
1415
+
1416#ifndef STBI_NO_LINEAR
+
1417static float * stbi__loadf_main(stbi__context * s, int * x, int * y, int * comp, int req_comp) {
+
1418 unsigned char * data;
+
1419#ifndef STBI_NO_HDR
+
1420 if (stbi__hdr_test(s)) {
+ +
1422 float * hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
+
1423 if (hdr_data)
+ +
1425 return hdr_data;
+
1426 }
+
1427#endif
+ +
1429 if (data)
+
1430 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+
1431 return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+
1432}
+
1433
+
1434STBIDEF float * stbi_loadf_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * comp, int req_comp) {
+ + +
1437 return stbi__loadf_main(&s, x, y, comp, req_comp);
+
1438}
+
1439
+
1440STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk, void * user, int * x, int * y, int * comp,
+
1441 int req_comp) {
+ + +
1444 return stbi__loadf_main(&s, x, y, comp, req_comp);
+
1445}
+
1446
+
1447#ifndef STBI_NO_STDIO
+
1448STBIDEF float * stbi_loadf(char const * filename, int * x, int * y, int * comp, int req_comp) {
+
1449 float * result;
+
1450 FILE * f = stbi__fopen(filename, "rb");
+
1451 if (!f)
+
1452 return stbi__errpf("can't fopen", "Unable to open file");
+
1453 result = stbi_loadf_from_file(f, x, y, comp, req_comp);
+
1454 fclose(f);
+
1455 return result;
+
1456}
+
1457
+
1458STBIDEF float * stbi_loadf_from_file(FILE * f, int * x, int * y, int * comp, int req_comp) {
+ +
1460 stbi__start_file(&s, f);
+
1461 return stbi__loadf_main(&s, x, y, comp, req_comp);
+
1462}
+
1463#endif // !STBI_NO_STDIO
+
1464
+
1465#endif // !STBI_NO_LINEAR
+
1466
+
1467// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+
1468// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+
1469// reports false!
+
1470
+
1471STBIDEF int stbi_is_hdr_from_memory(stbi_uc const * buffer, int len) {
+
1472#ifndef STBI_NO_HDR
+ + +
1475 return stbi__hdr_test(&s);
+
1476#else
+ + +
1479 return 0;
+
1480#endif
+
1481}
+
1482
+
1483#ifndef STBI_NO_STDIO
+
1484STBIDEF int stbi_is_hdr(char const * filename) {
+
1485 FILE * f = stbi__fopen(filename, "rb");
+
1486 int result = 0;
+
1487 if (f) {
+
1488 result = stbi_is_hdr_from_file(f);
+
1489 fclose(f);
+
1490 }
+
1491 return result;
+
1492}
+
1493
+
1494STBIDEF int stbi_is_hdr_from_file(FILE * f) {
+
1495#ifndef STBI_NO_HDR
+
1496 long pos = ftell(f);
+
1497 int res;
+ +
1499 stbi__start_file(&s, f);
+
1500 res = stbi__hdr_test(&s);
+
1501 fseek(f, pos, SEEK_SET);
+
1502 return res;
+
1503#else
+
1504 STBI_NOTUSED(f);
+
1505 return 0;
+
1506#endif
+
1507}
+
1508#endif // !STBI_NO_STDIO
+
1509
+
1510STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk, void * user) {
+
1511#ifndef STBI_NO_HDR
+ + +
1514 return stbi__hdr_test(&s);
+
1515#else
+ + +
1518 return 0;
+
1519#endif
+
1520}
+
1521
+
1522#ifndef STBI_NO_LINEAR
+
1523static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
+
1524
+
1525STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+
1526STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+
1527#endif
+
1528
+
1529static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
+
1530
+
1531STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }
+
1532STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }
+
1533
+
1535//
+
1536// Common code used by all image loaders
+
1537//
+
1538
+ +
1540
+
1541static void stbi__refill_buffer(stbi__context * s) {
+
1542 int n = (s->io.read)(s->io_user_data, (char *)s->buffer_start, s->buflen);
+
1543 s->callback_already_read += (int)(s->img_buffer - s->img_buffer_original);
+
1544 if (n == 0) {
+
1545 // at end of file, treat same as if from memory, but need to handle case
+
1546 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+
1547 s->read_from_callbacks = 0;
+
1548 s->img_buffer = s->buffer_start;
+
1549 s->img_buffer_end = s->buffer_start + 1;
+
1550 *s->img_buffer = 0;
+
1551 } else {
+
1552 s->img_buffer = s->buffer_start;
+
1553 s->img_buffer_end = s->buffer_start + n;
+
1554 }
+
1555}
+
1556
+ +
1558 if (s->img_buffer < s->img_buffer_end)
+
1559 return *s->img_buffer++;
+
1560 if (s->read_from_callbacks) {
+ +
1562 return *s->img_buffer++;
+
1563 }
+
1564 return 0;
+
1565}
+
1566
+
1567#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+
1568// nothing
+
1569#else
+ +
1571 if (s->io.read) {
+
1572 if (!(s->io.eof)(s->io_user_data))
+
1573 return 0;
+
1574 // if feof() is true, check if buffer = end
+
1575 // special case: we've only got the special 0 character at the end
+
1576 if (s->read_from_callbacks == 0)
+
1577 return 1;
+
1578 }
+
1579
+
1580 return s->img_buffer >= s->img_buffer_end;
+
1581}
+
1582#endif
+
1583
+
1584#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && \
+
1585 defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
+
1586// nothing
+
1587#else
+
1588static void stbi__skip(stbi__context * s, int n) {
+
1589 if (n == 0)
+
1590 return; // already there!
+
1591 if (n < 0) {
+
1592 s->img_buffer = s->img_buffer_end;
+
1593 return;
+
1594 }
+
1595 if (s->io.read) {
+
1596 int blen = (int)(s->img_buffer_end - s->img_buffer);
+
1597 if (blen < n) {
+
1598 s->img_buffer = s->img_buffer_end;
+
1599 (s->io.skip)(s->io_user_data, n - blen);
+
1600 return;
+
1601 }
+
1602 }
+
1603 s->img_buffer += n;
+
1604}
+
1605#endif
+
1606
+
1607#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
+
1608// nothing
+
1609#else
+
1610static int stbi__getn(stbi__context * s, stbi_uc * buffer, int n) {
+
1611 if (s->io.read) {
+
1612 int blen = (int)(s->img_buffer_end - s->img_buffer);
+
1613 if (blen < n) {
+
1614 int res, count;
+
1615
+
1616 memcpy(buffer, s->img_buffer, blen);
+
1617
+
1618 count = (s->io.read)(s->io_user_data, (char *)buffer + blen, n - blen);
+
1619 res = (count == (n - blen));
+
1620 s->img_buffer = s->img_buffer_end;
+
1621 return res;
+
1622 }
+
1623 }
+
1624
+
1625 if (s->img_buffer + n <= s->img_buffer_end) {
+
1626 memcpy(buffer, s->img_buffer, n);
+
1627 s->img_buffer += n;
+
1628 return 1;
+
1629 } else
+
1630 return 0;
+
1631}
+
1632#endif
+
1633
+
1634#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+
1635// nothing
+
1636#else
+
1637static int stbi__get16be(stbi__context * s) {
+
1638 int z = stbi__get8(s);
+
1639 return (z << 8) + stbi__get8(s);
+
1640}
+
1641#endif
+
1642
+
1643#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+
1644// nothing
+
1645#else
+ + +
1648 return (z << 16) + stbi__get16be(s);
+
1649}
+
1650#endif
+
1651
+
1652#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+
1653// nothing
+
1654#else
+
1655static int stbi__get16le(stbi__context * s) {
+
1656 int z = stbi__get8(s);
+
1657 return z + (stbi__get8(s) << 8);
+
1658}
+
1659#endif
+
1660
+
1661#ifndef STBI_NO_BMP
+ + +
1664 z += (stbi__uint32)stbi__get16le(s) << 16;
+
1665 return z;
+
1666}
+
1667#endif
+
1668
+
1669#define STBI__BYTECAST(x) ((stbi_uc)((x)&255)) // truncate int to byte without warnings
+
1670
+
1671#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && \
+
1672 defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+
1673// nothing
+
1674#else
+
1676//
+
1677// generic converter from built-in img_n to req_comp
+
1678// individual types do this automatically as much as possible (e.g. jpeg
+
1679// does all cases internally since it needs to colorspace convert anyway,
+
1680// and it never has alpha, so very few cases ). png can automatically
+
1681// interleave an alpha=255 channel, but falls back to this for other cases
+
1682//
+
1683// assume data buffer is malloced, so malloc a new one and free that one
+
1684// only failure mode is malloc failing
+
1685
+
1686static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8); }
+
1687#endif
+
1688
+
1689#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && \
+
1690 defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+
1691// nothing
+
1692#else
+
1693static unsigned char * stbi__convert_format(unsigned char * data, int img_n, int req_comp, unsigned int x, unsigned int y) {
+
1694 int i, j;
+
1695 unsigned char * good;
+
1696
+
1697 if (req_comp == img_n)
+
1698 return data;
+
1699 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
1700
+
1701 good = (unsigned char *)stbi__malloc_mad3(req_comp, x, y, 0);
+
1702 if (good == NULL) {
+
1703 STBI_FREE(data);
+
1704 return stbi__errpuc("outofmem", "Out of memory");
+
1705 }
+
1706
+
1707 for (j = 0; j < (int)y; ++j) {
+
1708 unsigned char * src = data + j * x * img_n;
+
1709 unsigned char * dest = good + j * x * req_comp;
+
1710
+
1711#define STBI__COMBO(a, b) ((a)*8 + (b))
+
1712#define STBI__CASE(a, b) \
+
1713 case STBI__COMBO(a, b): \
+
1714 for (i = x - 1; i >= 0; --i, src += a, dest += b)
+
1715 // convert source image with img_n components to one with req_comp components;
+
1716 // avoid switch per pixel, so use switch per scanline and massive macros
+
1717 switch (STBI__COMBO(img_n, req_comp)) {
+
1718 STBI__CASE(1, 2) {
+
1719 dest[0] = src[0];
+
1720 dest[1] = 255;
+
1721 }
+
1722 break;
+
1723 STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
+
1724 break;
+
1725 STBI__CASE(1, 4) {
+
1726 dest[0] = dest[1] = dest[2] = src[0];
+
1727 dest[3] = 255;
+
1728 }
+
1729 break;
+
1730 STBI__CASE(2, 1) { dest[0] = src[0]; }
+
1731 break;
+
1732 STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
+
1733 break;
+
1734 STBI__CASE(2, 4) {
+
1735 dest[0] = dest[1] = dest[2] = src[0];
+
1736 dest[3] = src[1];
+
1737 }
+
1738 break;
+
1739 STBI__CASE(3, 4) {
+
1740 dest[0] = src[0];
+
1741 dest[1] = src[1];
+
1742 dest[2] = src[2];
+
1743 dest[3] = 255;
+
1744 }
+
1745 break;
+
1746 STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }
+
1747 break;
+
1748 STBI__CASE(3, 2) {
+
1749 dest[0] = stbi__compute_y(src[0], src[1], src[2]);
+
1750 dest[1] = 255;
+
1751 }
+
1752 break;
+
1753 STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }
+
1754 break;
+
1755 STBI__CASE(4, 2) {
+
1756 dest[0] = stbi__compute_y(src[0], src[1], src[2]);
+
1757 dest[1] = src[3];
+
1758 }
+
1759 break;
+
1760 STBI__CASE(4, 3) {
+
1761 dest[0] = src[0];
+
1762 dest[1] = src[1];
+
1763 dest[2] = src[2];
+
1764 }
+
1765 break;
+
1766 default:
+
1767 STBI_ASSERT(0);
+
1768 STBI_FREE(data);
+
1769 STBI_FREE(good);
+
1770 return stbi__errpuc("unsupported", "Unsupported format conversion");
+
1771 }
+
1772#undef STBI__CASE
+
1773 }
+
1774
+
1775 STBI_FREE(data);
+
1776 return good;
+
1777}
+
1778#endif
+
1779
+
1780#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+
1781// nothing
+
1782#else
+
1783static stbi__uint16 stbi__compute_y_16(int r, int g, int b) { return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8); }
+
1784#endif
+
1785
+
1786#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+
1787// nothing
+
1788#else
+
1789static stbi__uint16 * stbi__convert_format16(stbi__uint16 * data, int img_n, int req_comp, unsigned int x, unsigned int y) {
+
1790 int i, j;
+ +
1792
+
1793 if (req_comp == img_n)
+
1794 return data;
+
1795 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
1796
+
1797 good = (stbi__uint16 *)stbi__malloc(req_comp * x * y * 2);
+
1798 if (good == NULL) {
+
1799 STBI_FREE(data);
+
1800 return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
+
1801 }
+
1802
+
1803 for (j = 0; j < (int)y; ++j) {
+
1804 stbi__uint16 * src = data + j * x * img_n;
+
1805 stbi__uint16 * dest = good + j * x * req_comp;
+
1806
+
1807#define STBI__COMBO(a, b) ((a)*8 + (b))
+
1808#define STBI__CASE(a, b) \
+
1809 case STBI__COMBO(a, b): \
+
1810 for (i = x - 1; i >= 0; --i, src += a, dest += b)
+
1811 // convert source image with img_n components to one with req_comp components;
+
1812 // avoid switch per pixel, so use switch per scanline and massive macros
+
1813 switch (STBI__COMBO(img_n, req_comp)) {
+
1814 STBI__CASE(1, 2) {
+
1815 dest[0] = src[0];
+
1816 dest[1] = 0xffff;
+
1817 }
+
1818 break;
+
1819 STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
+
1820 break;
+
1821 STBI__CASE(1, 4) {
+
1822 dest[0] = dest[1] = dest[2] = src[0];
+
1823 dest[3] = 0xffff;
+
1824 }
+
1825 break;
+
1826 STBI__CASE(2, 1) { dest[0] = src[0]; }
+
1827 break;
+
1828 STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
+
1829 break;
+
1830 STBI__CASE(2, 4) {
+
1831 dest[0] = dest[1] = dest[2] = src[0];
+
1832 dest[3] = src[1];
+
1833 }
+
1834 break;
+
1835 STBI__CASE(3, 4) {
+
1836 dest[0] = src[0];
+
1837 dest[1] = src[1];
+
1838 dest[2] = src[2];
+
1839 dest[3] = 0xffff;
+
1840 }
+
1841 break;
+
1842 STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }
+
1843 break;
+
1844 STBI__CASE(3, 2) {
+
1845 dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
+
1846 dest[1] = 0xffff;
+
1847 }
+
1848 break;
+
1849 STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }
+
1850 break;
+
1851 STBI__CASE(4, 2) {
+
1852 dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
+
1853 dest[1] = src[3];
+
1854 }
+
1855 break;
+
1856 STBI__CASE(4, 3) {
+
1857 dest[0] = src[0];
+
1858 dest[1] = src[1];
+
1859 dest[2] = src[2];
+
1860 }
+
1861 break;
+
1862 default:
+
1863 STBI_ASSERT(0);
+
1864 STBI_FREE(data);
+
1865 STBI_FREE(good);
+
1866 return (stbi__uint16 *)stbi__errpuc("unsupported", "Unsupported format conversion");
+
1867 }
+
1868#undef STBI__CASE
+
1869 }
+
1870
+
1871 STBI_FREE(data);
+
1872 return good;
+
1873}
+
1874#endif
+
1875
+
1876#ifndef STBI_NO_LINEAR
+
1877static float * stbi__ldr_to_hdr(stbi_uc * data, int x, int y, int comp) {
+
1878 int i, k, n;
+
1879 float * output;
+
1880 if (!data)
+
1881 return NULL;
+
1882 output = (float *)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+
1883 if (output == NULL) {
+
1884 STBI_FREE(data);
+
1885 return stbi__errpf("outofmem", "Out of memory");
+
1886 }
+
1887 // compute number of non-alpha components
+
1888 if (comp & 1)
+
1889 n = comp;
+
1890 else
+
1891 n = comp - 1;
+
1892 for (i = 0; i < x * y; ++i) {
+
1893 for (k = 0; k < n; ++k) {
+
1894 output[i * comp + k] = (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+
1895 }
+
1896 }
+
1897 if (n < comp) {
+
1898 for (i = 0; i < x * y; ++i) {
+
1899 output[i * comp + n] = data[i * comp + n] / 255.0f;
+
1900 }
+
1901 }
+
1902 STBI_FREE(data);
+
1903 return output;
+
1904}
+
1905#endif
+
1906
+
1907#ifndef STBI_NO_HDR
+
1908#define stbi__float2int(x) ((int)(x))
+
1909static stbi_uc * stbi__hdr_to_ldr(float * data, int x, int y, int comp) {
+
1910 int i, k, n;
+
1911 stbi_uc * output;
+
1912 if (!data)
+
1913 return NULL;
+
1914 output = (stbi_uc *)stbi__malloc_mad3(x, y, comp, 0);
+
1915 if (output == NULL) {
+
1916 STBI_FREE(data);
+
1917 return stbi__errpuc("outofmem", "Out of memory");
+
1918 }
+
1919 // compute number of non-alpha components
+
1920 if (comp & 1)
+
1921 n = comp;
+
1922 else
+
1923 n = comp - 1;
+
1924 for (i = 0; i < x * y; ++i) {
+
1925 for (k = 0; k < n; ++k) {
+
1926 float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+
1927 if (z < 0)
+
1928 z = 0;
+
1929 if (z > 255)
+
1930 z = 255;
+
1931 output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+
1932 }
+
1933 if (k < comp) {
+
1934 float z = data[i * comp + k] * 255 + 0.5f;
+
1935 if (z < 0)
+
1936 z = 0;
+
1937 if (z > 255)
+
1938 z = 255;
+
1939 output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+
1940 }
+
1941 }
+
1942 STBI_FREE(data);
+
1943 return output;
+
1944}
+
1945#endif
+
1946
+
1948//
+
1949// "baseline" JPEG/JFIF decoder
+
1950//
+
1951// simple implementation
+
1952// - doesn't support delayed output of y-dimension
+
1953// - simple interface (only one output format: 8-bit interleaved RGB)
+
1954// - doesn't try to recover corrupt jpegs
+
1955// - doesn't allow partial loading, loading multiple at once
+
1956// - still fast on x86 (copying globals into locals doesn't help x86)
+
1957// - allocates lots of intermediate memory (full size of all components)
+
1958// - non-interleaved case requires this anyway
+
1959// - allows good upsampling (see next)
+
1960// high-quality
+
1961// - upsampled channels are bilinearly interpolated, even across blocks
+
1962// - quality integer IDCT derived from IJG's 'slow'
+
1963// performance
+
1964// - fast huffman; reasonable integer IDCT
+
1965// - some SIMD kernels for common paths on targets with SSE2/NEON
+
1966// - uses a lot of intermediate memory, could cache poorly
+
1967
+
1968#ifndef STBI_NO_JPEG
+
1969
+
1970// huffman decoding acceleration
+
1971#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
+
1972
+
1973typedef struct {
+
1974 stbi_uc fast[1 << FAST_BITS];
+
1975 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+
1976 stbi__uint16 code[256];
+
1977 stbi_uc values[256];
+
1978 stbi_uc size[257];
+
1979 unsigned int maxcode[18];
+
1980 int delta[17]; // old 'firstsymbol' - old 'firstcode'
+ +
1982
+
1983typedef struct {
+
1984 stbi__context * s;
+ + +
1987 stbi__uint16 dequant[4][64];
+
1988 stbi__int16 fast_ac[4][1 << FAST_BITS];
+
1989
+
1990 // sizes for components, interleaved MCUs
+
1991 int img_h_max, img_v_max;
+
1992 int img_mcu_x, img_mcu_y;
+
1993 int img_mcu_w, img_mcu_h;
+
1994
+
1995 // definition of jpeg image component
+
1996 struct {
+
1997 int id;
+
1998 int h, v;
+
1999 int tq;
+
2000 int hd, ha;
+
2001 int dc_pred;
+
2002
+
2003 int x, y, w2, h2;
+
2004 stbi_uc * data;
+
2005 void *raw_data, *raw_coeff;
+
2006 stbi_uc * linebuf;
+
2007 short * coeff; // progressive only
+
2008 int coeff_w, coeff_h; // number of 8x8 coefficient blocks
+
2009 } img_comp[4];
+
2010
+
2011 stbi__uint32 code_buffer; // jpeg entropy-coded buffer
+
2012 int code_bits; // number of valid bits
+
2013 unsigned char marker; // marker seen while filling entropy buffer
+
2014 int nomore; // flag if we saw a marker so must stop
+
2015
+
2016 int progressive;
+
2017 int spec_start;
+
2018 int spec_end;
+
2019 int succ_high;
+
2020 int succ_low;
+
2021 int eob_run;
+
2022 int jfif;
+
2023 int app14_color_transform; // Adobe APP14 tag
+
2024 int rgb;
+
2025
+
2026 int scan_n, order[4];
+ +
2028
+
2029 // kernels
+
2030 void (*idct_block_kernel)(stbi_uc * out, int out_stride, short data[64]);
+
2031 void (*YCbCr_to_RGB_kernel)(stbi_uc * out, const stbi_uc * y, const stbi_uc * pcb, const stbi_uc * pcr, int count,
+
2032 int step);
+
2033 stbi_uc * (*resample_row_hv_2_kernel)(stbi_uc * out, stbi_uc * in_near, stbi_uc * in_far, int w, int hs);
+
2034} stbi__jpeg;
+
2035
+
2036static int stbi__build_huffman(stbi__huffman * h, int * count) {
+
2037 int i, j, k = 0;
+
2038 unsigned int code;
+
2039 // build size list for each symbol (from JPEG spec)
+
2040 for (i = 0; i < 16; ++i) {
+
2041 for (j = 0; j < count[i]; ++j) {
+
2042 h->size[k++] = (stbi_uc)(i + 1);
+
2043 if (k >= 257)
+
2044 return stbi__err("bad size list", "Corrupt JPEG");
+
2045 }
+
2046 }
+
2047 h->size[k] = 0;
+
2048
+
2049 // compute actual symbols (from jpeg spec)
+
2050 code = 0;
+
2051 k = 0;
+
2052 for (j = 1; j <= 16; ++j) {
+
2053 // compute delta to add to code to compute symbol id
+
2054 h->delta[j] = k - code;
+
2055 if (h->size[k] == j) {
+
2056 while (h->size[k] == j)
+
2057 h->code[k++] = (stbi__uint16)(code++);
+
2058 if (code - 1 >= (1u << j))
+
2059 return stbi__err("bad code lengths", "Corrupt JPEG");
+
2060 }
+
2061 // compute largest code + 1 for this size, preshifted as needed later
+
2062 h->maxcode[j] = code << (16 - j);
+
2063 code <<= 1;
+
2064 }
+
2065 h->maxcode[j] = 0xffffffff;
+
2066
+
2067 // build non-spec acceleration table; 255 is flag for not-accelerated
+
2068 memset(h->fast, 255, 1 << FAST_BITS);
+
2069 for (i = 0; i < k; ++i) {
+
2070 int s = h->size[i];
+
2071 if (s <= FAST_BITS) {
+
2072 int c = h->code[i] << (FAST_BITS - s);
+
2073 int m = 1 << (FAST_BITS - s);
+
2074 for (j = 0; j < m; ++j) {
+
2075 h->fast[c + j] = (stbi_uc)i;
+
2076 }
+
2077 }
+
2078 }
+
2079 return 1;
+
2080}
+
2081
+
2082// build a table that decodes both magnitude and value of small ACs in
+
2083// one go.
+ +
2085 int i;
+
2086 for (i = 0; i < (1 << FAST_BITS); ++i) {
+
2087 stbi_uc fast = h->fast[i];
+
2088 fast_ac[i] = 0;
+
2089 if (fast < 255) {
+
2090 int rs = h->values[fast];
+
2091 int run = (rs >> 4) & 15;
+
2092 int magbits = rs & 15;
+
2093 int len = h->size[fast];
+
2094
+
2095 if (magbits && len + magbits <= FAST_BITS) {
+
2096 // magnitude code followed by receive_extend code
+
2097 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+
2098 int m = 1 << (magbits - 1);
+
2099 if (k < m)
+
2100 k += (~0U << magbits) + 1;
+
2101 // if the result is small enough, we can fit it in fast_ac table
+
2102 if (k >= -128 && k <= 127)
+
2103 fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));
+
2104 }
+
2105 }
+
2106 }
+
2107}
+
2108
+
2109static void stbi__grow_buffer_unsafe(stbi__jpeg * j) {
+
2110 do {
+
2111 unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+
2112 if (b == 0xff) {
+
2113 int c = stbi__get8(j->s);
+
2114 while (c == 0xff)
+
2115 c = stbi__get8(j->s); // consume fill bytes
+
2116 if (c != 0) {
+
2117 j->marker = (unsigned char)c;
+
2118 j->nomore = 1;
+
2119 return;
+
2120 }
+
2121 }
+
2122 j->code_buffer |= b << (24 - j->code_bits);
+
2123 j->code_bits += 8;
+
2124 } while (j->code_bits <= 24);
+
2125}
+
2126
+
2127// (1 << n) - 1
+
2128static const stbi__uint32 stbi__bmask[17] = {0, 1, 3, 7, 15, 31, 63, 127, 255,
+
2129 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};
+
2130
+
2131// decode a jpeg huffman value from the bitstream
+ +
2133 unsigned int temp;
+
2134 int c, k;
+
2135
+
2136 if (j->code_bits < 16)
+ +
2138
+
2139 // look at the top FAST_BITS and determine what symbol ID it is,
+
2140 // if the code is <= FAST_BITS
+
2141 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+
2142 k = h->fast[c];
+
2143 if (k < 255) {
+
2144 int s = h->size[k];
+
2145 if (s > j->code_bits)
+
2146 return -1;
+
2147 j->code_buffer <<= s;
+
2148 j->code_bits -= s;
+
2149 return h->values[k];
+
2150 }
+
2151
+
2152 // naive test is to shift the code_buffer down so k bits are
+
2153 // valid, then test against maxcode. To speed this up, we've
+
2154 // preshifted maxcode left so that it has (16-k) 0s at the
+
2155 // end; in other words, regardless of the number of bits, it
+
2156 // wants to be compared against something shifted to have 16;
+
2157 // that way we don't need to shift inside the loop.
+
2158 temp = j->code_buffer >> 16;
+
2159 for (k = FAST_BITS + 1;; ++k)
+
2160 if (temp < h->maxcode[k])
+
2161 break;
+
2162 if (k == 17) {
+
2163 // error! code not found
+
2164 j->code_bits -= 16;
+
2165 return -1;
+
2166 }
+
2167
+
2168 if (k > j->code_bits)
+
2169 return -1;
+
2170
+
2171 // convert the huffman code to the symbol id
+
2172 c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+
2173 if (c < 0 || c >= 256) // symbol id out of bounds!
+
2174 return -1;
+
2175 STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
2176
+
2177 // convert the id to a symbol
+
2178 j->code_bits -= k;
+
2179 j->code_buffer <<= k;
+
2180 return h->values[c];
+
2181}
+
2182
+
2183// bias[n] = (-1<<n) + 1
+
2184static const int stbi__jbias[16] = {0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767};
+
2185
+
2186// combined JPEG 'receive' and JPEG 'extend', since baseline
+
2187// always extends everything it receives.
+
2188stbi_inline static int stbi__extend_receive(stbi__jpeg * j, int n) {
+
2189 unsigned int k;
+
2190 int sgn;
+
2191 if (j->code_bits < n)
+ +
2193 if (j->code_bits < n)
+
2194 return 0; // ran out of bits from stream, return 0s intead of continuing
+
2195
+
2196 sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
+
2197 k = stbi_lrot(j->code_buffer, n);
+
2198 j->code_buffer = k & ~stbi__bmask[n];
+
2199 k &= stbi__bmask[n];
+
2200 j->code_bits -= n;
+
2201 return k + (stbi__jbias[n] & (sgn - 1));
+
2202}
+
2203
+
2204// get some unsigned bits
+
2205stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg * j, int n) {
+
2206 unsigned int k;
+
2207 if (j->code_bits < n)
+ +
2209 if (j->code_bits < n)
+
2210 return 0; // ran out of bits from stream, return 0s intead of continuing
+
2211 k = stbi_lrot(j->code_buffer, n);
+
2212 j->code_buffer = k & ~stbi__bmask[n];
+
2213 k &= stbi__bmask[n];
+
2214 j->code_bits -= n;
+
2215 return k;
+
2216}
+
2217
+ +
2219 unsigned int k;
+
2220 if (j->code_bits < 1)
+ +
2222 if (j->code_bits < 1)
+
2223 return 0; // ran out of bits from stream, return 0s intead of continuing
+
2224 k = j->code_buffer;
+
2225 j->code_buffer <<= 1;
+
2226 --j->code_bits;
+
2227 return k & 0x80000000;
+
2228}
+
2229
+
2230// given a value that's at position X in the zigzag stream,
+
2231// where does it appear in the 8x8 matrix coded as row-major?
+
2232static const stbi_uc stbi__jpeg_dezigzag[64 + 15] = {
+
2233 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35,
+
2234 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+
2235 // let corrupt input sample past end
+
2236 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};
+
2237
+
2238// decode one 64-entry block--
+ +
2240 int b, stbi__uint16 * dequant) {
+
2241 int diff, dc, k;
+
2242 int t;
+
2243
+
2244 if (j->code_bits < 16)
+ + +
2247 if (t < 0 || t > 15)
+
2248 return stbi__err("bad huffman code", "Corrupt JPEG");
+
2249
+
2250 // 0 all the ac values now so we can do it 32-bits at a time
+
2251 memset(data, 0, 64 * sizeof(data[0]));
+
2252
+
2253 diff = t ? stbi__extend_receive(j, t) : 0;
+
2254 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff))
+
2255 return stbi__err("bad delta", "Corrupt JPEG");
+
2256 dc = j->img_comp[b].dc_pred + diff;
+
2257 j->img_comp[b].dc_pred = dc;
+ +
2259 return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
2260 data[0] = (short)(dc * dequant[0]);
+
2261
+
2262 // decode AC components, see JPEG spec
+
2263 k = 1;
+
2264 do {
+
2265 unsigned int zig;
+
2266 int c, r, s;
+
2267 if (j->code_bits < 16)
+ +
2269 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+
2270 r = fac[c];
+
2271 if (r) { // fast-AC path
+
2272 k += (r >> 4) & 15; // run
+
2273 s = r & 15; // combined length
+
2274 if (s > j->code_bits)
+
2275 return stbi__err("bad huffman code", "Combined length longer than code bits available");
+
2276 j->code_buffer <<= s;
+
2277 j->code_bits -= s;
+
2278 // decode into unzigzag'd location
+
2279 zig = stbi__jpeg_dezigzag[k++];
+
2280 data[zig] = (short)((r >> 8) * dequant[zig]);
+
2281 } else {
+ +
2283 if (rs < 0)
+
2284 return stbi__err("bad huffman code", "Corrupt JPEG");
+
2285 s = rs & 15;
+
2286 r = rs >> 4;
+
2287 if (s == 0) {
+
2288 if (rs != 0xf0)
+
2289 break; // end block
+
2290 k += 16;
+
2291 } else {
+
2292 k += r;
+
2293 // decode into unzigzag'd location
+
2294 zig = stbi__jpeg_dezigzag[k++];
+
2295 data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
+
2296 }
+
2297 }
+
2298 } while (k < 64);
+
2299 return 1;
+
2300}
+
2301
+
2302static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j, short data[64], stbi__huffman * hdc, int b) {
+
2303 int diff, dc;
+
2304 int t;
+
2305 if (j->spec_end != 0)
+
2306 return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
2307
+
2308 if (j->code_bits < 16)
+ +
2310
+
2311 if (j->succ_high == 0) {
+
2312 // first scan for DC coefficient, must be first
+
2313 memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
+ +
2315 if (t < 0 || t > 15)
+
2316 return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
2317 diff = t ? stbi__extend_receive(j, t) : 0;
+
2318
+
2319 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff))
+
2320 return stbi__err("bad delta", "Corrupt JPEG");
+
2321 dc = j->img_comp[b].dc_pred + diff;
+
2322 j->img_comp[b].dc_pred = dc;
+
2323 if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low))
+
2324 return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
2325 data[0] = (short)(dc * (1 << j->succ_low));
+
2326 } else {
+
2327 // refinement scan for DC coefficient
+
2328 if (stbi__jpeg_get_bit(j))
+
2329 data[0] += (short)(1 << j->succ_low);
+
2330 }
+
2331 return 1;
+
2332}
+
2333
+
2334// @OPTIMIZE: store non-zigzagged during the decode passes,
+
2335// and only de-zigzag when dequantizing
+
2336static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j, short data[64], stbi__huffman * hac, stbi__int16 * fac) {
+
2337 int k;
+
2338 if (j->spec_start == 0)
+
2339 return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
2340
+
2341 if (j->succ_high == 0) {
+
2342 int shift = j->succ_low;
+
2343
+
2344 if (j->eob_run) {
+
2345 --j->eob_run;
+
2346 return 1;
+
2347 }
+
2348
+
2349 k = j->spec_start;
+
2350 do {
+
2351 unsigned int zig;
+
2352 int c, r, s;
+
2353 if (j->code_bits < 16)
+ +
2355 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+
2356 r = fac[c];
+
2357 if (r) { // fast-AC path
+
2358 k += (r >> 4) & 15; // run
+
2359 s = r & 15; // combined length
+
2360 if (s > j->code_bits)
+
2361 return stbi__err("bad huffman code", "Combined length longer than code bits available");
+
2362 j->code_buffer <<= s;
+
2363 j->code_bits -= s;
+
2364 zig = stbi__jpeg_dezigzag[k++];
+
2365 data[zig] = (short)((r >> 8) * (1 << shift));
+
2366 } else {
+ +
2368 if (rs < 0)
+
2369 return stbi__err("bad huffman code", "Corrupt JPEG");
+
2370 s = rs & 15;
+
2371 r = rs >> 4;
+
2372 if (s == 0) {
+
2373 if (r < 15) {
+
2374 j->eob_run = (1 << r);
+
2375 if (r)
+
2376 j->eob_run += stbi__jpeg_get_bits(j, r);
+
2377 --j->eob_run;
+
2378 break;
+
2379 }
+
2380 k += 16;
+
2381 } else {
+
2382 k += r;
+
2383 zig = stbi__jpeg_dezigzag[k++];
+
2384 data[zig] = (short)(stbi__extend_receive(j, s) * (1 << shift));
+
2385 }
+
2386 }
+
2387 } while (k <= j->spec_end);
+
2388 } else {
+
2389 // refinement scan for these AC coefficients
+
2390
+
2391 short bit = (short)(1 << j->succ_low);
+
2392
+
2393 if (j->eob_run) {
+
2394 --j->eob_run;
+
2395 for (k = j->spec_start; k <= j->spec_end; ++k) {
+
2396 short * p = &data[stbi__jpeg_dezigzag[k]];
+
2397 if (*p != 0)
+
2398 if (stbi__jpeg_get_bit(j))
+
2399 if ((*p & bit) == 0) {
+
2400 if (*p > 0)
+
2401 *p += bit;
+
2402 else
+
2403 *p -= bit;
+
2404 }
+
2405 }
+
2406 } else {
+
2407 k = j->spec_start;
+
2408 do {
+
2409 int r, s;
+ +
2411 j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+
2412 if (rs < 0)
+
2413 return stbi__err("bad huffman code", "Corrupt JPEG");
+
2414 s = rs & 15;
+
2415 r = rs >> 4;
+
2416 if (s == 0) {
+
2417 if (r < 15) {
+
2418 j->eob_run = (1 << r) - 1;
+
2419 if (r)
+
2420 j->eob_run += stbi__jpeg_get_bits(j, r);
+
2421 r = 64; // force end of block
+
2422 } else {
+
2423 // r=15 s=0 should write 16 0s, so we just do
+
2424 // a run of 15 0s and then write s (which is 0),
+
2425 // so we don't have to do anything special here
+
2426 }
+
2427 } else {
+
2428 if (s != 1)
+
2429 return stbi__err("bad huffman code", "Corrupt JPEG");
+
2430 // sign bit
+
2431 if (stbi__jpeg_get_bit(j))
+
2432 s = bit;
+
2433 else
+
2434 s = -bit;
+
2435 }
+
2436
+
2437 // advance by r
+
2438 while (k <= j->spec_end) {
+
2439 short * p = &data[stbi__jpeg_dezigzag[k++]];
+
2440 if (*p != 0) {
+
2441 if (stbi__jpeg_get_bit(j))
+
2442 if ((*p & bit) == 0) {
+
2443 if (*p > 0)
+
2444 *p += bit;
+
2445 else
+
2446 *p -= bit;
+
2447 }
+
2448 } else {
+
2449 if (r == 0) {
+
2450 *p = (short)s;
+
2451 break;
+
2452 }
+
2453 --r;
+
2454 }
+
2455 }
+
2456 } while (k <= j->spec_end);
+
2457 }
+
2458 }
+
2459 return 1;
+
2460}
+
2461
+
2462// take a -128..127 value and stbi__clamp it and convert to 0..255
+
2463stbi_inline static stbi_uc stbi__clamp(int x) {
+
2464 // trick to use a single test to catch both cases
+
2465 if ((unsigned int)x > 255) {
+
2466 if (x < 0)
+
2467 return 0;
+
2468 if (x > 255)
+
2469 return 255;
+
2470 }
+
2471 return (stbi_uc)x;
+
2472}
+
2473
+
2474#define stbi__f2f(x) ((int)(((x)*4096 + 0.5)))
+
2475#define stbi__fsh(x) ((x)*4096)
+
2476
+
2477// derived from jidctint -- DCT_ISLOW
+
2478#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7) \
+
2479 int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
+
2480 p2 = s2; \
+
2481 p3 = s6; \
+
2482 p1 = (p2 + p3) * stbi__f2f(0.5411961f); \
+
2483 t2 = p1 + p3 * stbi__f2f(-1.847759065f); \
+
2484 t3 = p1 + p2 * stbi__f2f(0.765366865f); \
+
2485 p2 = s0; \
+
2486 p3 = s4; \
+
2487 t0 = stbi__fsh(p2 + p3); \
+
2488 t1 = stbi__fsh(p2 - p3); \
+
2489 x0 = t0 + t3; \
+
2490 x3 = t0 - t3; \
+
2491 x1 = t1 + t2; \
+
2492 x2 = t1 - t2; \
+
2493 t0 = s7; \
+
2494 t1 = s5; \
+
2495 t2 = s3; \
+
2496 t3 = s1; \
+
2497 p3 = t0 + t2; \
+
2498 p4 = t1 + t3; \
+
2499 p1 = t0 + t3; \
+
2500 p2 = t1 + t2; \
+
2501 p5 = (p3 + p4) * stbi__f2f(1.175875602f); \
+
2502 t0 = t0 * stbi__f2f(0.298631336f); \
+
2503 t1 = t1 * stbi__f2f(2.053119869f); \
+
2504 t2 = t2 * stbi__f2f(3.072711026f); \
+
2505 t3 = t3 * stbi__f2f(1.501321110f); \
+
2506 p1 = p5 + p1 * stbi__f2f(-0.899976223f); \
+
2507 p2 = p5 + p2 * stbi__f2f(-2.562915447f); \
+
2508 p3 = p3 * stbi__f2f(-1.961570560f); \
+
2509 p4 = p4 * stbi__f2f(-0.390180644f); \
+
2510 t3 += p1 + p4; \
+
2511 t2 += p2 + p3; \
+
2512 t1 += p2 + p4; \
+
2513 t0 += p1 + p3;
+
2514
+
2515static void stbi__idct_block(stbi_uc * out, int out_stride, short data[64]) {
+
2516 int i, val[64], *v = val;
+
2517 stbi_uc * o;
+
2518 short * d = data;
+
2519
+
2520 // columns
+
2521 for (i = 0; i < 8; ++i, ++d, ++v) {
+
2522 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+
2523 if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0) {
+
2524 // no shortcut 0 seconds
+
2525 // (1|2|3|4|5|6|7)==0 0 seconds
+
2526 // all separate -0.047 seconds
+
2527 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
+
2528 int dcterm = d[0] * 4;
+
2529 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+
2530 } else {
+
2531 STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
+
2532 // constants scaled things up by 1<<12; let's bring them back
+
2533 // down, but keep 2 extra bits of precision
+
2534 x0 += 512;
+
2535 x1 += 512;
+
2536 x2 += 512;
+
2537 x3 += 512;
+
2538 v[0] = (x0 + t3) >> 10;
+
2539 v[56] = (x0 - t3) >> 10;
+
2540 v[8] = (x1 + t2) >> 10;
+
2541 v[48] = (x1 - t2) >> 10;
+
2542 v[16] = (x2 + t1) >> 10;
+
2543 v[40] = (x2 - t1) >> 10;
+
2544 v[24] = (x3 + t0) >> 10;
+
2545 v[32] = (x3 - t0) >> 10;
+
2546 }
+
2547 }
+
2548
+
2549 for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
+
2550 // no fast case since the first 1D IDCT spread components out
+
2551 STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
+
2552 // constants scaled things up by 1<<12, plus we had 1<<2 from first
+
2553 // loop, plus horizontal and vertical each scale by sqrt(8) so together
+
2554 // we've got an extra 1<<3, so 1<<17 total we need to remove.
+
2555 // so we want to round that, which means adding 0.5 * 1<<17,
+
2556 // aka 65536. Also, we'll end up with -128 to 127 that we want
+
2557 // to encode as 0..255 by adding 128, so we'll add that before the shift
+
2558 x0 += 65536 + (128 << 17);
+
2559 x1 += 65536 + (128 << 17);
+
2560 x2 += 65536 + (128 << 17);
+
2561 x3 += 65536 + (128 << 17);
+
2562 // tried computing the shifts into temps, or'ing the temps to see
+
2563 // if any were out of range, but that was slower
+
2564 o[0] = stbi__clamp((x0 + t3) >> 17);
+
2565 o[7] = stbi__clamp((x0 - t3) >> 17);
+
2566 o[1] = stbi__clamp((x1 + t2) >> 17);
+
2567 o[6] = stbi__clamp((x1 - t2) >> 17);
+
2568 o[2] = stbi__clamp((x2 + t1) >> 17);
+
2569 o[5] = stbi__clamp((x2 - t1) >> 17);
+
2570 o[3] = stbi__clamp((x3 + t0) >> 17);
+
2571 o[4] = stbi__clamp((x3 - t0) >> 17);
+
2572 }
+
2573}
+
2574
+
2575#ifdef STBI_SSE2
+
2576// sse2 integer IDCT. not the fastest possible implementation but it
+
2577// produces bit-identical results to the generic C version so it's
+
2578// fully "transparent".
+
2579static void stbi__idct_simd(stbi_uc * out, int out_stride, short data[64]) {
+
2580 // This is constructed to match our regular (generic) integer IDCT exactly.
+ +
2582 __m128i tmp;
+
2583
+
2584// dot product constant: even elems=x, odd elems=y
+
2585#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))
+
2586
+
2587// out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
+
2588// out(1) = c1[even]*x + c1[odd]*y
+
2589#define dct_rot(out0, out1, x, y, c0, c1) \
+
2590 __m128i c0##lo = _mm_unpacklo_epi16((x), (y)); \
+
2591 __m128i c0##hi = _mm_unpackhi_epi16((x), (y)); \
+
2592 __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+
2593 __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+
2594 __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+
2595 __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
2596
+
2597// out = in << 12 (in 16-bit, out 32-bit)
+
2598#define dct_widen(out, in) \
+
2599 __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+
2600 __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
2601
+
2602// wide add
+
2603#define dct_wadd(out, a, b) \
+
2604 __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+
2605 __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
2606
+
2607// wide sub
+
2608#define dct_wsub(out, a, b) \
+
2609 __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+
2610 __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
2611
+
2612// butterfly a/b, add bias, then shift by "s" and pack
+
2613#define dct_bfly32o(out0, out1, a, b, bias, s) \
+
2614 { \
+
2615 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+
2616 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+
2617 dct_wadd(sum, abiased, b); \
+
2618 dct_wsub(dif, abiased, b); \
+
2619 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+
2620 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+
2621 }
+
2622
+
2623// 8-bit interleave step (for transposes)
+
2624#define dct_interleave8(a, b) \
+
2625 tmp = a; \
+
2626 a = _mm_unpacklo_epi8(a, b); \
+
2627 b = _mm_unpackhi_epi8(tmp, b)
+
2628
+
2629// 16-bit interleave step (for transposes)
+
2630#define dct_interleave16(a, b) \
+
2631 tmp = a; \
+
2632 a = _mm_unpacklo_epi16(a, b); \
+
2633 b = _mm_unpackhi_epi16(tmp, b)
+
2634
+
2635#define dct_pass(bias, shift) \
+
2636 { \
+
2637 /* even part */ \
+
2638 dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1); \
+
2639 __m128i sum04 = _mm_add_epi16(row0, row4); \
+
2640 __m128i dif04 = _mm_sub_epi16(row0, row4); \
+
2641 dct_widen(t0e, sum04); \
+
2642 dct_widen(t1e, dif04); \
+
2643 dct_wadd(x0, t0e, t3e); \
+
2644 dct_wsub(x3, t0e, t3e); \
+
2645 dct_wadd(x1, t1e, t2e); \
+
2646 dct_wsub(x2, t1e, t2e); \
+
2647 /* odd part */ \
+
2648 dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1); \
+
2649 dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1); \
+
2650 __m128i sum17 = _mm_add_epi16(row1, row7); \
+
2651 __m128i sum35 = _mm_add_epi16(row3, row5); \
+
2652 dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1); \
+
2653 dct_wadd(x4, y0o, y4o); \
+
2654 dct_wadd(x5, y1o, y5o); \
+
2655 dct_wadd(x6, y2o, y5o); \
+
2656 dct_wadd(x7, y3o, y4o); \
+
2657 dct_bfly32o(row0, row7, x0, x7, bias, shift); \
+
2658 dct_bfly32o(row1, row6, x1, x6, bias, shift); \
+
2659 dct_bfly32o(row2, row5, x2, x5, bias, shift); \
+
2660 dct_bfly32o(row3, row4, x3, x4, bias, shift); \
+
2661 }
+
2662
+
2663 __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+
2664 __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
+
2665 __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+
2666 __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+
2667 __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
+
2668 __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
+
2669 __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
+
2670 __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
+
2671
+
2672 // rounding biases in column/row passes, see stbi__idct_block for explanation.
+ +
2674 __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
+
2675
+
2676 // load
+
2677 row0 = _mm_load_si128((const __m128i *)(data + 0 * 8));
+
2678 row1 = _mm_load_si128((const __m128i *)(data + 1 * 8));
+
2679 row2 = _mm_load_si128((const __m128i *)(data + 2 * 8));
+
2680 row3 = _mm_load_si128((const __m128i *)(data + 3 * 8));
+
2681 row4 = _mm_load_si128((const __m128i *)(data + 4 * 8));
+
2682 row5 = _mm_load_si128((const __m128i *)(data + 5 * 8));
+
2683 row6 = _mm_load_si128((const __m128i *)(data + 6 * 8));
+
2684 row7 = _mm_load_si128((const __m128i *)(data + 7 * 8));
+
2685
+
2686 // column pass
+
2687 dct_pass(bias_0, 10);
+
2688
+
2689 {
+
2690 // 16bit 8x8 transpose pass 1
+ + + + +
2695
+
2696 // transpose pass 2
+ + + + +
2701
+
2702 // transpose pass 3
+ + + + +
2707 }
+
2708
+
2709 // row pass
+
2710 dct_pass(bias_1, 17);
+
2711
+
2712 {
+
2713 // pack
+
2714 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+ + + +
2718
+
2719 // 8bit 8x8 transpose pass 1
+
2720 dct_interleave8(p0, p2); // a0e0a1e1...
+
2721 dct_interleave8(p1, p3); // c0g0c1g1...
+
2722
+
2723 // transpose pass 2
+
2724 dct_interleave8(p0, p1); // a0c0e0g0...
+
2725 dct_interleave8(p2, p3); // b0d0f0h0...
+
2726
+
2727 // transpose pass 3
+
2728 dct_interleave8(p0, p2); // a0b0c0d0...
+
2729 dct_interleave8(p1, p3); // a4b4c4d4...
+
2730
+
2731 // store
+ +
2733 out += out_stride;
+ +
2735 out += out_stride;
+ +
2737 out += out_stride;
+ +
2739 out += out_stride;
+ +
2741 out += out_stride;
+ +
2743 out += out_stride;
+ +
2745 out += out_stride;
+ +
2747 }
+
2748
+
2749#undef dct_const
+
2750#undef dct_rot
+
2751#undef dct_widen
+
2752#undef dct_wadd
+
2753#undef dct_wsub
+
2754#undef dct_bfly32o
+
2755#undef dct_interleave8
+
2756#undef dct_interleave16
+
2757#undef dct_pass
+
2758}
+
2759
+
2760#endif // STBI_SSE2
+
2761
+
2762#ifdef STBI_NEON
+
2763
+
2764// NEON integer IDCT. should produce bit-identical
+
2765// results to the generic C version.
+
2766static void stbi__idct_simd(stbi_uc * out, int out_stride, short data[64]) {
+ +
2768
+
2769 int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+
2770 int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+
2771 int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
+
2772 int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
+
2773 int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+
2774 int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+
2775 int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+
2776 int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+
2777 int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
+
2778 int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
+
2779 int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
+
2780 int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
+
2781
+
2782#define dct_long_mul(out, inq, coeff) \
+
2783 int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+
2784 int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
2785
+
2786#define dct_long_mac(out, acc, inq, coeff) \
+
2787 int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+
2788 int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
2789
+
2790#define dct_widen(out, inq) \
+
2791 int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+
2792 int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
2793
+
2794// wide add
+
2795#define dct_wadd(out, a, b) \
+
2796 int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+
2797 int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
2798
+
2799// wide sub
+
2800#define dct_wsub(out, a, b) \
+
2801 int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+
2802 int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
2803
+
2804// butterfly a/b, then shift using "shiftop" by "s" and pack
+
2805#define dct_bfly32o(out0, out1, a, b, shiftop, s) \
+
2806 { \
+
2807 dct_wadd(sum, a, b); \
+
2808 dct_wsub(dif, a, b); \
+
2809 out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+
2810 out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+
2811 }
+
2812
+
2813#define dct_pass(shiftop, shift) \
+
2814 { \
+
2815 /* even part */ \
+
2816 int16x8_t sum26 = vaddq_s16(row2, row6); \
+
2817 dct_long_mul(p1e, sum26, rot0_0); \
+
2818 dct_long_mac(t2e, p1e, row6, rot0_1); \
+
2819 dct_long_mac(t3e, p1e, row2, rot0_2); \
+
2820 int16x8_t sum04 = vaddq_s16(row0, row4); \
+
2821 int16x8_t dif04 = vsubq_s16(row0, row4); \
+
2822 dct_widen(t0e, sum04); \
+
2823 dct_widen(t1e, dif04); \
+
2824 dct_wadd(x0, t0e, t3e); \
+
2825 dct_wsub(x3, t0e, t3e); \
+
2826 dct_wadd(x1, t1e, t2e); \
+
2827 dct_wsub(x2, t1e, t2e); \
+
2828 /* odd part */ \
+
2829 int16x8_t sum15 = vaddq_s16(row1, row5); \
+
2830 int16x8_t sum17 = vaddq_s16(row1, row7); \
+
2831 int16x8_t sum35 = vaddq_s16(row3, row5); \
+
2832 int16x8_t sum37 = vaddq_s16(row3, row7); \
+
2833 int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+
2834 dct_long_mul(p5o, sumodd, rot1_0); \
+
2835 dct_long_mac(p1o, p5o, sum17, rot1_1); \
+
2836 dct_long_mac(p2o, p5o, sum35, rot1_2); \
+
2837 dct_long_mul(p3o, sum37, rot2_0); \
+
2838 dct_long_mul(p4o, sum15, rot2_1); \
+
2839 dct_wadd(sump13o, p1o, p3o); \
+
2840 dct_wadd(sump24o, p2o, p4o); \
+
2841 dct_wadd(sump23o, p2o, p3o); \
+
2842 dct_wadd(sump14o, p1o, p4o); \
+
2843 dct_long_mac(x4, sump13o, row7, rot3_0); \
+
2844 dct_long_mac(x5, sump24o, row5, rot3_1); \
+
2845 dct_long_mac(x6, sump23o, row3, rot3_2); \
+
2846 dct_long_mac(x7, sump14o, row1, rot3_3); \
+
2847 dct_bfly32o(row0, row7, x0, x7, shiftop, shift); \
+
2848 dct_bfly32o(row1, row6, x1, x6, shiftop, shift); \
+
2849 dct_bfly32o(row2, row5, x2, x5, shiftop, shift); \
+
2850 dct_bfly32o(row3, row4, x3, x4, shiftop, shift); \
+
2851 }
+
2852
+
2853 // load
+
2854 row0 = vld1q_s16(data + 0 * 8);
+
2855 row1 = vld1q_s16(data + 1 * 8);
+
2856 row2 = vld1q_s16(data + 2 * 8);
+
2857 row3 = vld1q_s16(data + 3 * 8);
+
2858 row4 = vld1q_s16(data + 4 * 8);
+
2859 row5 = vld1q_s16(data + 5 * 8);
+
2860 row6 = vld1q_s16(data + 6 * 8);
+
2861 row7 = vld1q_s16(data + 7 * 8);
+
2862
+
2863 // add DC bias
+
2864 row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
2865
+
2866 // column pass
+ +
2868
+
2869 // 16bit 8x8 transpose
+
2870 {
+
2871// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+
2872// whether compilers actually get this is another story, sadly.
+
2873#define dct_trn16(x, y) \
+
2874 { \
+
2875 int16x8x2_t t = vtrnq_s16(x, y); \
+
2876 x = t.val[0]; \
+
2877 y = t.val[1]; \
+
2878 }
+
2879#define dct_trn32(x, y) \
+
2880 { \
+
2881 int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); \
+
2882 x = vreinterpretq_s16_s32(t.val[0]); \
+
2883 y = vreinterpretq_s16_s32(t.val[1]); \
+
2884 }
+
2885#define dct_trn64(x, y) \
+
2886 { \
+
2887 int16x8_t x0 = x; \
+
2888 int16x8_t y0 = y; \
+
2889 x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); \
+
2890 y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); \
+
2891 }
+
2892
+
2893 // pass 1
+
2894 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+ + + +
2898
+
2899 // pass 2
+
2900 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+ + + +
2904
+
2905 // pass 3
+
2906 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+ + + +
2910
+
2911#undef dct_trn16
+
2912#undef dct_trn32
+
2913#undef dct_trn64
+
2914 }
+
2915
+
2916 // row pass
+
2917 // vrshrn_n_s32 only supports shifts up to 16, we need
+
2918 // 17. so do a non-rounding shift of 16 first then follow
+
2919 // up with a rounding shift by 1.
+
2920 dct_pass(vshrn_n_s32, 16);
+
2921
+
2922 {
+
2923 // pack and round
+ + + + + + + + +
2932
+
2933 // again, these can translate into one instruction, but often don't.
+
2934#define dct_trn8_8(x, y) \
+
2935 { \
+
2936 uint8x8x2_t t = vtrn_u8(x, y); \
+
2937 x = t.val[0]; \
+
2938 y = t.val[1]; \
+
2939 }
+
2940#define dct_trn8_16(x, y) \
+
2941 { \
+
2942 uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); \
+
2943 x = vreinterpret_u8_u16(t.val[0]); \
+
2944 y = vreinterpret_u8_u16(t.val[1]); \
+
2945 }
+
2946#define dct_trn8_32(x, y) \
+
2947 { \
+
2948 uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); \
+
2949 x = vreinterpret_u8_u32(t.val[0]); \
+
2950 y = vreinterpret_u8_u32(t.val[1]); \
+
2951 }
+
2952
+
2953 // sadly can't use interleaved stores here since we only write
+
2954 // 8 bytes to each scan line!
+
2955
+
2956 // 8x8 8-bit transpose pass 1
+
2957 dct_trn8_8(p0, p1);
+
2958 dct_trn8_8(p2, p3);
+
2959 dct_trn8_8(p4, p5);
+
2960 dct_trn8_8(p6, p7);
+
2961
+
2962 // pass 2
+
2963 dct_trn8_16(p0, p2);
+
2964 dct_trn8_16(p1, p3);
+
2965 dct_trn8_16(p4, p6);
+
2966 dct_trn8_16(p5, p7);
+
2967
+
2968 // pass 3
+
2969 dct_trn8_32(p0, p4);
+
2970 dct_trn8_32(p1, p5);
+
2971 dct_trn8_32(p2, p6);
+
2972 dct_trn8_32(p3, p7);
+
2973
+
2974 // store
+
2975 vst1_u8(out, p0);
+
2976 out += out_stride;
+
2977 vst1_u8(out, p1);
+
2978 out += out_stride;
+
2979 vst1_u8(out, p2);
+
2980 out += out_stride;
+
2981 vst1_u8(out, p3);
+
2982 out += out_stride;
+
2983 vst1_u8(out, p4);
+
2984 out += out_stride;
+
2985 vst1_u8(out, p5);
+
2986 out += out_stride;
+
2987 vst1_u8(out, p6);
+
2988 out += out_stride;
+
2989 vst1_u8(out, p7);
+
2990
+
2991#undef dct_trn8_8
+
2992#undef dct_trn8_16
+
2993#undef dct_trn8_32
+
2994 }
+
2995
+
2996#undef dct_long_mul
+
2997#undef dct_long_mac
+
2998#undef dct_widen
+
2999#undef dct_wadd
+
3000#undef dct_wsub
+
3001#undef dct_bfly32o
+
3002#undef dct_pass
+
3003}
+
3004
+
3005#endif // STBI_NEON
+
3006
+
3007#define STBI__MARKER_none 0xff
+
3008// if there's a pending marker from the entropy stream, return that
+
3009// otherwise, fetch from the stream and get a marker. if there's no
+
3010// marker, return 0xff, which is never a valid marker value
+ +
3012 stbi_uc x;
+
3013 if (j->marker != STBI__MARKER_none) {
+
3014 x = j->marker;
+
3015 j->marker = STBI__MARKER_none;
+
3016 return x;
+
3017 }
+
3018 x = stbi__get8(j->s);
+
3019 if (x != 0xff)
+
3020 return STBI__MARKER_none;
+
3021 while (x == 0xff)
+
3022 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+
3023 return x;
+
3024}
+
3025
+
3026// in each scan, we'll have scan_n components, and the order
+
3027// of the components is specified by order[]
+
3028#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
+
3029
+
3030// after a restart interval, stbi__jpeg_reset the entropy decoder and
+
3031// the dc prediction
+
3032static void stbi__jpeg_reset(stbi__jpeg * j) {
+
3033 j->code_bits = 0;
+
3034 j->code_buffer = 0;
+
3035 j->nomore = 0;
+
3036 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+
3037 j->marker = STBI__MARKER_none;
+
3038 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+
3039 j->eob_run = 0;
+
3040 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+
3041 // since we don't even allow 1<<30 pixels
+
3042}
+
3043
+ + +
3046 if (!z->progressive) {
+
3047 if (z->scan_n == 1) {
+
3048 int i, j;
+
3049 STBI_SIMD_ALIGN(short, data[64]);
+
3050 int n = z->order[0];
+
3051 // non-interleaved data, we just need to process one block at a time,
+
3052 // in trivial scanline order
+
3053 // number of blocks to do just depends on how many actual "pixels" this
+
3054 // component has, independent of interleaved MCU blocking and such
+
3055 int w = (z->img_comp[n].x + 7) >> 3;
+
3056 int h = (z->img_comp[n].y + 7) >> 3;
+
3057 for (j = 0; j < h; ++j) {
+
3058 for (i = 0; i < w; ++i) {
+
3059 int ha = z->img_comp[n].ha;
+
3060 if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n,
+
3061 z->dequant[z->img_comp[n].tq]))
+
3062 return 0;
+
3063 z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+
3064 // every data block is an MCU, so countdown the restart interval
+
3065 if (--z->todo <= 0) {
+
3066 if (z->code_bits < 24)
+ +
3068 // if it's NOT a restart, then just bail, so we get corrupt data
+
3069 // rather than no data
+
3070 if (!STBI__RESTART(z->marker))
+
3071 return 1;
+ +
3073 }
+
3074 }
+
3075 }
+
3076 return 1;
+
3077 } else { // interleaved
+
3078 int i, j, k, x, y;
+
3079 STBI_SIMD_ALIGN(short, data[64]);
+
3080 for (j = 0; j < z->img_mcu_y; ++j) {
+
3081 for (i = 0; i < z->img_mcu_x; ++i) {
+
3082 // scan an interleaved mcu... process scan_n components in order
+
3083 for (k = 0; k < z->scan_n; ++k) {
+
3084 int n = z->order[k];
+
3085 // scan out an mcu's worth of this component; that's just determined
+
3086 // by the basic H and V specified for the component
+
3087 for (y = 0; y < z->img_comp[n].v; ++y) {
+
3088 for (x = 0; x < z->img_comp[n].h; ++x) {
+
3089 int x2 = (i * z->img_comp[n].h + x) * 8;
+
3090 int y2 = (j * z->img_comp[n].v + y) * 8;
+
3091 int ha = z->img_comp[n].ha;
+
3092 if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha,
+
3093 z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))
+
3094 return 0;
+
3095 z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2,
+
3096 data);
+
3097 }
+
3098 }
+
3099 }
+
3100 // after all interleaved components, that's an interleaved MCU,
+
3101 // so now count down the restart interval
+
3102 if (--z->todo <= 0) {
+
3103 if (z->code_bits < 24)
+ +
3105 if (!STBI__RESTART(z->marker))
+
3106 return 1;
+ +
3108 }
+
3109 }
+
3110 }
+
3111 return 1;
+
3112 }
+
3113 } else {
+
3114 if (z->scan_n == 1) {
+
3115 int i, j;
+
3116 int n = z->order[0];
+
3117 // non-interleaved data, we just need to process one block at a time,
+
3118 // in trivial scanline order
+
3119 // number of blocks to do just depends on how many actual "pixels" this
+
3120 // component has, independent of interleaved MCU blocking and such
+
3121 int w = (z->img_comp[n].x + 7) >> 3;
+
3122 int h = (z->img_comp[n].y + 7) >> 3;
+
3123 for (j = 0; j < h; ++j) {
+
3124 for (i = 0; i < w; ++i) {
+
3125 short * data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+
3126 if (z->spec_start == 0) {
+
3127 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+
3128 return 0;
+
3129 } else {
+
3130 int ha = z->img_comp[n].ha;
+
3131 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+
3132 return 0;
+
3133 }
+
3134 // every data block is an MCU, so countdown the restart interval
+
3135 if (--z->todo <= 0) {
+
3136 if (z->code_bits < 24)
+ +
3138 if (!STBI__RESTART(z->marker))
+
3139 return 1;
+ +
3141 }
+
3142 }
+
3143 }
+
3144 return 1;
+
3145 } else { // interleaved
+
3146 int i, j, k, x, y;
+
3147 for (j = 0; j < z->img_mcu_y; ++j) {
+
3148 for (i = 0; i < z->img_mcu_x; ++i) {
+
3149 // scan an interleaved mcu... process scan_n components in order
+
3150 for (k = 0; k < z->scan_n; ++k) {
+
3151 int n = z->order[k];
+
3152 // scan out an mcu's worth of this component; that's just determined
+
3153 // by the basic H and V specified for the component
+
3154 for (y = 0; y < z->img_comp[n].v; ++y) {
+
3155 for (x = 0; x < z->img_comp[n].h; ++x) {
+
3156 int x2 = (i * z->img_comp[n].h + x);
+
3157 int y2 = (j * z->img_comp[n].v + y);
+
3158 short * data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+
3159 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+
3160 return 0;
+
3161 }
+
3162 }
+
3163 }
+
3164 // after all interleaved components, that's an interleaved MCU,
+
3165 // so now count down the restart interval
+
3166 if (--z->todo <= 0) {
+
3167 if (z->code_bits < 24)
+ +
3169 if (!STBI__RESTART(z->marker))
+
3170 return 1;
+ +
3172 }
+
3173 }
+
3174 }
+
3175 return 1;
+
3176 }
+
3177 }
+
3178}
+
3179
+
3180static void stbi__jpeg_dequantize(short * data, stbi__uint16 * dequant) {
+
3181 int i;
+
3182 for (i = 0; i < 64; ++i)
+
3183 data[i] *= dequant[i];
+
3184}
+
3185
+
3186static void stbi__jpeg_finish(stbi__jpeg * z) {
+
3187 if (z->progressive) {
+
3188 // dequantize and idct the data
+
3189 int i, j, n;
+
3190 for (n = 0; n < z->s->img_n; ++n) {
+
3191 int w = (z->img_comp[n].x + 7) >> 3;
+
3192 int h = (z->img_comp[n].y + 7) >> 3;
+
3193 for (j = 0; j < h; ++j) {
+
3194 for (i = 0; i < w; ++i) {
+
3195 short * data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+
3196 stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+
3197 z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+
3198 }
+
3199 }
+
3200 }
+
3201 }
+
3202}
+
3203
+
3204static int stbi__process_marker(stbi__jpeg * z, int m) {
+
3205 int L;
+
3206 switch (m) {
+
3207 case STBI__MARKER_none: // no marker found
+
3208 return stbi__err("expected marker", "Corrupt JPEG");
+
3209
+
3210 case 0xDD: // DRI - specify restart interval
+
3211 if (stbi__get16be(z->s) != 4)
+
3212 return stbi__err("bad DRI len", "Corrupt JPEG");
+
3213 z->restart_interval = stbi__get16be(z->s);
+
3214 return 1;
+
3215
+
3216 case 0xDB: // DQT - define quantization table
+
3217 L = stbi__get16be(z->s) - 2;
+
3218 while (L > 0) {
+
3219 int q = stbi__get8(z->s);
+
3220 int p = q >> 4, sixteen = (p != 0);
+
3221 int t = q & 15, i;
+
3222 if (p != 0 && p != 1)
+
3223 return stbi__err("bad DQT type", "Corrupt JPEG");
+
3224 if (t > 3)
+
3225 return stbi__err("bad DQT table", "Corrupt JPEG");
+
3226
+
3227 for (i = 0; i < 64; ++i)
+ +
3229 L -= (sixteen ? 129 : 65);
+
3230 }
+
3231 return L == 0;
+
3232
+
3233 case 0xC4: // DHT - define huffman table
+
3234 L = stbi__get16be(z->s) - 2;
+
3235 while (L > 0) {
+
3236 stbi_uc * v;
+
3237 int sizes[16], i, n = 0;
+
3238 int q = stbi__get8(z->s);
+
3239 int tc = q >> 4;
+
3240 int th = q & 15;
+
3241 if (tc > 1 || th > 3)
+
3242 return stbi__err("bad DHT header", "Corrupt JPEG");
+
3243 for (i = 0; i < 16; ++i) {
+
3244 sizes[i] = stbi__get8(z->s);
+
3245 n += sizes[i];
+
3246 }
+
3247 if (n > 256)
+
3248 return stbi__err("bad DHT header", "Corrupt JPEG"); // Loop over i < n would write past end of values!
+
3249 L -= 17;
+
3250 if (tc == 0) {
+
3251 if (!stbi__build_huffman(z->huff_dc + th, sizes))
+
3252 return 0;
+
3253 v = z->huff_dc[th].values;
+
3254 } else {
+
3255 if (!stbi__build_huffman(z->huff_ac + th, sizes))
+
3256 return 0;
+
3257 v = z->huff_ac[th].values;
+
3258 }
+
3259 for (i = 0; i < n; ++i)
+
3260 v[i] = stbi__get8(z->s);
+
3261 if (tc != 0)
+
3262 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+
3263 L -= n;
+
3264 }
+
3265 return L == 0;
+
3266 }
+
3267
+
3268 // check for comment block or APP blocks
+
3269 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+
3270 L = stbi__get16be(z->s);
+
3271 if (L < 2) {
+
3272 if (m == 0xFE)
+
3273 return stbi__err("bad COM len", "Corrupt JPEG");
+
3274 else
+
3275 return stbi__err("bad APP len", "Corrupt JPEG");
+
3276 }
+
3277 L -= 2;
+
3278
+
3279 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+
3280 static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};
+
3281 int ok = 1;
+
3282 int i;
+
3283 for (i = 0; i < 5; ++i)
+
3284 if (stbi__get8(z->s) != tag[i])
+
3285 ok = 0;
+
3286 L -= 5;
+
3287 if (ok)
+
3288 z->jfif = 1;
+
3289 } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+
3290 static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};
+
3291 int ok = 1;
+
3292 int i;
+
3293 for (i = 0; i < 6; ++i)
+
3294 if (stbi__get8(z->s) != tag[i])
+
3295 ok = 0;
+
3296 L -= 6;
+
3297 if (ok) {
+
3298 stbi__get8(z->s); // version
+
3299 stbi__get16be(z->s); // flags0
+
3300 stbi__get16be(z->s); // flags1
+
3301 z->app14_color_transform = stbi__get8(z->s); // color transform
+
3302 L -= 6;
+
3303 }
+
3304 }
+
3305
+
3306 stbi__skip(z->s, L);
+
3307 return 1;
+
3308 }
+
3309
+
3310 return stbi__err("unknown marker", "Corrupt JPEG");
+
3311}
+
3312
+
3313// after we see SOS
+ +
3315 int i;
+
3316 int Ls = stbi__get16be(z->s);
+
3317 z->scan_n = stbi__get8(z->s);
+
3318 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n)
+
3319 return stbi__err("bad SOS component count", "Corrupt JPEG");
+
3320 if (Ls != 6 + 2 * z->scan_n)
+
3321 return stbi__err("bad SOS len", "Corrupt JPEG");
+
3322 for (i = 0; i < z->scan_n; ++i) {
+
3323 int id = stbi__get8(z->s), which;
+
3324 int q = stbi__get8(z->s);
+
3325 for (which = 0; which < z->s->img_n; ++which)
+
3326 if (z->img_comp[which].id == id)
+
3327 break;
+
3328 if (which == z->s->img_n)
+
3329 return 0; // no match
+
3330 z->img_comp[which].hd = q >> 4;
+
3331 if (z->img_comp[which].hd > 3)
+
3332 return stbi__err("bad DC huff", "Corrupt JPEG");
+
3333 z->img_comp[which].ha = q & 15;
+
3334 if (z->img_comp[which].ha > 3)
+
3335 return stbi__err("bad AC huff", "Corrupt JPEG");
+
3336 z->order[i] = which;
+
3337 }
+
3338
+
3339 {
+
3340 int aa;
+
3341 z->spec_start = stbi__get8(z->s);
+
3342 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
+
3343 aa = stbi__get8(z->s);
+
3344 z->succ_high = (aa >> 4);
+
3345 z->succ_low = (aa & 15);
+
3346 if (z->progressive) {
+
3347 if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+
3348 return stbi__err("bad SOS", "Corrupt JPEG");
+
3349 } else {
+
3350 if (z->spec_start != 0)
+
3351 return stbi__err("bad SOS", "Corrupt JPEG");
+
3352 if (z->succ_high != 0 || z->succ_low != 0)
+
3353 return stbi__err("bad SOS", "Corrupt JPEG");
+
3354 z->spec_end = 63;
+
3355 }
+
3356 }
+
3357
+
3358 return 1;
+
3359}
+
3360
+
3361static int stbi__free_jpeg_components(stbi__jpeg * z, int ncomp, int why) {
+
3362 int i;
+
3363 for (i = 0; i < ncomp; ++i) {
+
3364 if (z->img_comp[i].raw_data) {
+
3365 STBI_FREE(z->img_comp[i].raw_data);
+
3366 z->img_comp[i].raw_data = NULL;
+
3367 z->img_comp[i].data = NULL;
+
3368 }
+
3369 if (z->img_comp[i].raw_coeff) {
+
3370 STBI_FREE(z->img_comp[i].raw_coeff);
+
3371 z->img_comp[i].raw_coeff = 0;
+
3372 z->img_comp[i].coeff = 0;
+
3373 }
+
3374 if (z->img_comp[i].linebuf) {
+
3375 STBI_FREE(z->img_comp[i].linebuf);
+
3376 z->img_comp[i].linebuf = NULL;
+
3377 }
+
3378 }
+
3379 return why;
+
3380}
+
3381
+
3382static int stbi__process_frame_header(stbi__jpeg * z, int scan) {
+
3383 stbi__context * s = z->s;
+
3384 int Lf, p, i, q, h_max = 1, v_max = 1, c;
+
3385 Lf = stbi__get16be(s);
+
3386 if (Lf < 11)
+
3387 return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
+
3388 p = stbi__get8(s);
+
3389 if (p != 8)
+
3390 return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
+
3391 s->img_y = stbi__get16be(s);
+
3392 if (s->img_y == 0)
+
3393 return stbi__err("no header height",
+
3394 "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+
3395 s->img_x = stbi__get16be(s);
+
3396 if (s->img_x == 0)
+
3397 return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
+
3398 if (s->img_y > STBI_MAX_DIMENSIONS)
+
3399 return stbi__err("too large", "Very large image (corrupt?)");
+
3400 if (s->img_x > STBI_MAX_DIMENSIONS)
+
3401 return stbi__err("too large", "Very large image (corrupt?)");
+
3402 c = stbi__get8(s);
+
3403 if (c != 3 && c != 1 && c != 4)
+
3404 return stbi__err("bad component count", "Corrupt JPEG");
+
3405 s->img_n = c;
+
3406 for (i = 0; i < c; ++i) {
+
3407 z->img_comp[i].data = NULL;
+
3408 z->img_comp[i].linebuf = NULL;
+
3409 }
+
3410
+
3411 if (Lf != 8 + 3 * s->img_n)
+
3412 return stbi__err("bad SOF len", "Corrupt JPEG");
+
3413
+
3414 z->rgb = 0;
+
3415 for (i = 0; i < s->img_n; ++i) {
+
3416 static const unsigned char rgb[3] = {'R', 'G', 'B'};
+
3417 z->img_comp[i].id = stbi__get8(s);
+
3418 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+
3419 ++z->rgb;
+
3420 q = stbi__get8(s);
+
3421 z->img_comp[i].h = (q >> 4);
+
3422 if (!z->img_comp[i].h || z->img_comp[i].h > 4)
+
3423 return stbi__err("bad H", "Corrupt JPEG");
+
3424 z->img_comp[i].v = q & 15;
+
3425 if (!z->img_comp[i].v || z->img_comp[i].v > 4)
+
3426 return stbi__err("bad V", "Corrupt JPEG");
+
3427 z->img_comp[i].tq = stbi__get8(s);
+
3428 if (z->img_comp[i].tq > 3)
+
3429 return stbi__err("bad TQ", "Corrupt JPEG");
+
3430 }
+
3431
+
3432 if (scan != STBI__SCAN_load)
+
3433 return 1;
+
3434
+
3435 if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0))
+
3436 return stbi__err("too large", "Image too large to decode");
+
3437
+
3438 for (i = 0; i < s->img_n; ++i) {
+
3439 if (z->img_comp[i].h > h_max)
+
3440 h_max = z->img_comp[i].h;
+
3441 if (z->img_comp[i].v > v_max)
+
3442 v_max = z->img_comp[i].v;
+
3443 }
+
3444
+
3445 // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+
3446 // and I've never seen a non-corrupted JPEG file actually use them
+
3447 for (i = 0; i < s->img_n; ++i) {
+
3448 if (h_max % z->img_comp[i].h != 0)
+
3449 return stbi__err("bad H", "Corrupt JPEG");
+
3450 if (v_max % z->img_comp[i].v != 0)
+
3451 return stbi__err("bad V", "Corrupt JPEG");
+
3452 }
+
3453
+
3454 // compute interleaved mcu info
+
3455 z->img_h_max = h_max;
+
3456 z->img_v_max = v_max;
+
3457 z->img_mcu_w = h_max * 8;
+
3458 z->img_mcu_h = v_max * 8;
+
3459 // these sizes can't be more than 17 bits
+
3460 z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
+
3461 z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
+
3462
+
3463 for (i = 0; i < s->img_n; ++i) {
+
3464 // number of effective pixels (e.g. for non-interleaved MCU)
+
3465 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
+
3466 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
+
3467 // to simplify generation, we'll allocate enough memory to decode
+
3468 // the bogus oversized data from using interleaved MCUs and their
+
3469 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+
3470 // discard the extra data until colorspace conversion
+
3471 //
+
3472 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+
3473 // so these muls can't overflow with 32-bit ints (which we require)
+
3474 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+
3475 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+
3476 z->img_comp[i].coeff = 0;
+
3477 z->img_comp[i].raw_coeff = 0;
+
3478 z->img_comp[i].linebuf = NULL;
+
3479 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+
3480 if (z->img_comp[i].raw_data == NULL)
+
3481 return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+
3482 // align blocks for idct using mmx/sse
+
3483 z->img_comp[i].data = (stbi_uc *)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
+
3484 if (z->progressive) {
+
3485 // w2, h2 are multiples of 8 (see above)
+
3486 z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+
3487 z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+
3488 z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+
3489 if (z->img_comp[i].raw_coeff == NULL)
+
3490 return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+
3491 z->img_comp[i].coeff = (short *)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
+
3492 }
+
3493 }
+
3494
+
3495 return 1;
+
3496}
+
3497
+
3498// use comparisons since in some cases we handle more than one case (e.g. SOF)
+
3499#define stbi__DNL(x) ((x) == 0xdc)
+
3500#define stbi__SOI(x) ((x) == 0xd8)
+
3501#define stbi__EOI(x) ((x) == 0xd9)
+
3502#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+
3503#define stbi__SOS(x) ((x) == 0xda)
+
3504
+
3505#define stbi__SOF_progressive(x) ((x) == 0xc2)
+
3506
+
3507static int stbi__decode_jpeg_header(stbi__jpeg * z, int scan) {
+
3508 int m;
+
3509 z->jfif = 0;
+
3510 z->app14_color_transform = -1; // valid values are 0,1,2
+
3511 z->marker = STBI__MARKER_none; // initialize cached marker to empty
+
3512 m = stbi__get_marker(z);
+
3513 if (!stbi__SOI(m))
+
3514 return stbi__err("no SOI", "Corrupt JPEG");
+
3515 if (scan == STBI__SCAN_type)
+
3516 return 1;
+
3517 m = stbi__get_marker(z);
+
3518 while (!stbi__SOF(m)) {
+
3519 if (!stbi__process_marker(z, m))
+
3520 return 0;
+
3521 m = stbi__get_marker(z);
+
3522 while (m == STBI__MARKER_none) {
+
3523 // some files have extra padding after their blocks, so ok, we'll scan
+
3524 if (stbi__at_eof(z->s))
+
3525 return stbi__err("no SOF", "Corrupt JPEG");
+
3526 m = stbi__get_marker(z);
+
3527 }
+
3528 }
+
3529 z->progressive = stbi__SOF_progressive(m);
+ +
3531 return 0;
+
3532 return 1;
+
3533}
+
3534
+ +
3536 // some JPEGs have junk at end, skip over it but if we find what looks
+
3537 // like a valid marker, resume there
+
3538 while (!stbi__at_eof(j->s)) {
+
3539 int x = stbi__get8(j->s);
+
3540 while (x == 255) { // might be a marker
+
3541 if (stbi__at_eof(j->s))
+
3542 return STBI__MARKER_none;
+
3543 x = stbi__get8(j->s);
+
3544 if (x != 0x00 && x != 0xff) {
+
3545 // not a stuffed zero or lead-in to another marker, looks
+
3546 // like an actual marker, return it
+
3547 return x;
+
3548 }
+
3549 // stuffed zero has x=0 now which ends the loop, meaning we go
+
3550 // back to regular scan loop.
+
3551 // repeated 0xff keeps trying to read the next byte of the marker.
+
3552 }
+
3553 }
+
3554 return STBI__MARKER_none;
+
3555}
+
3556
+
3557// decode image to YCbCr format
+
3558static int stbi__decode_jpeg_image(stbi__jpeg * j) {
+
3559 int m;
+
3560 for (m = 0; m < 4; m++) {
+
3561 j->img_comp[m].raw_data = NULL;
+
3562 j->img_comp[m].raw_coeff = NULL;
+
3563 }
+
3564 j->restart_interval = 0;
+ +
3566 return 0;
+
3567 m = stbi__get_marker(j);
+
3568 while (!stbi__EOI(m)) {
+
3569 if (stbi__SOS(m)) {
+ +
3571 return 0;
+ +
3573 return 0;
+
3574 if (j->marker == STBI__MARKER_none) {
+
3575 j->marker = stbi__skip_jpeg_junk_at_end(j);
+
3576 // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+
3577 }
+
3578 m = stbi__get_marker(j);
+
3579 if (STBI__RESTART(m))
+
3580 m = stbi__get_marker(j);
+
3581 } else if (stbi__DNL(m)) {
+
3582 int Ld = stbi__get16be(j->s);
+ +
3584 if (Ld != 4)
+
3585 return stbi__err("bad DNL len", "Corrupt JPEG");
+
3586 if (NL != j->s->img_y)
+
3587 return stbi__err("bad DNL height", "Corrupt JPEG");
+
3588 m = stbi__get_marker(j);
+
3589 } else {
+
3590 if (!stbi__process_marker(j, m))
+
3591 return 1;
+
3592 m = stbi__get_marker(j);
+
3593 }
+
3594 }
+
3595 if (j->progressive)
+ +
3597 return 1;
+
3598}
+
3599
+
3600// static jfif-centered resampling (across block boundaries)
+
3601
+
3602typedef stbi_uc * (*resample_row_func)(stbi_uc * out, stbi_uc * in0, stbi_uc * in1, int w, int hs);
+
3603
+
3604#define stbi__div4(x) ((stbi_uc)((x) >> 2))
+
3605
+
3606static stbi_uc * resample_row_1(stbi_uc * out, stbi_uc * in_near, stbi_uc * in_far, int w, int hs) {
+ + +
3609 STBI_NOTUSED(w);
+ +
3611 return in_near;
+
3612}
+
3613
+ +
3615 // need to generate two samples vertically for every one in input
+
3616 int i;
+ +
3618 for (i = 0; i < w; ++i)
+
3619 out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
+
3620 return out;
+
3621}
+
3622
+ +
3624 // need to generate two samples horizontally for every one in input
+
3625 int i;
+
3626 stbi_uc * input = in_near;
+
3627
+
3628 if (w == 1) {
+
3629 // if only one sample, can't do any interpolation
+
3630 out[0] = out[1] = input[0];
+
3631 return out;
+
3632 }
+
3633
+
3634 out[0] = input[0];
+
3635 out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
+
3636 for (i = 1; i < w - 1; ++i) {
+
3637 int n = 3 * input[i] + 2;
+
3638 out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
+
3639 out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
+
3640 }
+
3641 out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
+
3642 out[i * 2 + 1] = input[w - 1];
+
3643
+ + +
3646
+
3647 return out;
+
3648}
+
3649
+
3650#define stbi__div16(x) ((stbi_uc)((x) >> 4))
+
3651
+ +
3653 // need to generate 2x2 samples for every one in input
+
3654 int i, t0, t1;
+
3655 if (w == 1) {
+
3656 out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+
3657 return out;
+
3658 }
+
3659
+
3660 t1 = 3 * in_near[0] + in_far[0];
+
3661 out[0] = stbi__div4(t1 + 2);
+
3662 for (i = 1; i < w; ++i) {
+
3663 t0 = t1;
+
3664 t1 = 3 * in_near[i] + in_far[i];
+
3665 out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+
3666 out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+
3667 }
+
3668 out[w * 2 - 1] = stbi__div4(t1 + 2);
+
3669
+ +
3671
+
3672 return out;
+
3673}
+
3674
+
3675#if defined(STBI_SSE2) || defined(STBI_NEON)
+ +
3677 // need to generate 2x2 samples for every one in input
+
3678 int i = 0, t0, t1;
+
3679
+
3680 if (w == 1) {
+
3681 out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+
3682 return out;
+
3683 }
+
3684
+
3685 t1 = 3 * in_near[0] + in_far[0];
+
3686 // process groups of 8 pixels for as long as we can.
+
3687 // note we can't handle the last pixel in a row in this loop
+
3688 // because we need to handle the filter boundary conditions.
+
3689 for (; i < ((w - 1) & ~7); i += 8) {
+
3690#if defined(STBI_SSE2)
+
3691 // load and perform the vertical filtering pass
+
3692 // this uses 3*x + y = 4*x + (y - x)
+ + + + + + + +
3700 __m128i curr = _mm_add_epi16(nears, diff); // current row
+
3701
+
3702 // horizontal filter works the same based on shifted vers of current
+
3703 // row. "prev" is current row shifted right by 1 pixel; we need to
+
3704 // insert the previous pixel value (from t1).
+
3705 // "next" is current row shifted left by 1 pixel, with first pixel
+
3706 // of next block of 8 pixels added in.
+ + +
3709 __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+
3710 __m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
+
3711
+
3712 // horizontal filter, polyphase implementation since it's convenient:
+
3713 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+
3714 // odd pixels = 3*cur + next = cur*4 + (next - cur)
+
3715 // note the shared term.
+
3716 __m128i bias = _mm_set1_epi16(8);
+ +
3718 __m128i prvd = _mm_sub_epi16(prev, curr);
+
3719 __m128i nxtd = _mm_sub_epi16(next, curr);
+
3720 __m128i curb = _mm_add_epi16(curs, bias);
+ + +
3723
+
3724 // interleave even and odd pixels, then undo scaling.
+ + + + +
3729
+
3730 // pack and write output
+ +
3732 _mm_storeu_si128((__m128i *)(out + i * 2), outv);
+
3733#elif defined(STBI_NEON)
+
3734 // load and perform the vertical filtering pass
+
3735 // this uses 3*x + y = 4*x + (y - x)
+ + + + +
3740 int16x8_t curr = vaddq_s16(nears, diff); // current row
+
3741
+
3742 // horizontal filter works the same based on shifted vers of current
+
3743 // row. "prev" is current row shifted right by 1 pixel; we need to
+
3744 // insert the previous pixel value (from t1).
+
3745 // "next" is current row shifted left by 1 pixel, with first pixel
+
3746 // of next block of 8 pixels added in.
+ + +
3749 int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+
3750 int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
+
3751
+
3752 // horizontal filter, polyphase implementation since it's convenient:
+
3753 // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+
3754 // odd pixels = 3*cur + next = cur*4 + (next - cur)
+
3755 // note the shared term.
+ +
3757 int16x8_t prvd = vsubq_s16(prev, curr);
+
3758 int16x8_t nxtd = vsubq_s16(next, curr);
+ + +
3761
+
3762 // undo scaling and round, then store with even/odd phases interleaved
+
3763 uint8x8x2_t o;
+
3764 o.val[0] = vqrshrun_n_s16(even, 4);
+
3765 o.val[1] = vqrshrun_n_s16(odd, 4);
+
3766 vst2_u8(out + i * 2, o);
+
3767#endif
+
3768
+
3769 // "previous" value for next iter
+
3770 t1 = 3 * in_near[i + 7] + in_far[i + 7];
+
3771 }
+
3772
+
3773 t0 = t1;
+
3774 t1 = 3 * in_near[i] + in_far[i];
+
3775 out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+
3776
+
3777 for (++i; i < w; ++i) {
+
3778 t0 = t1;
+
3779 t1 = 3 * in_near[i] + in_far[i];
+
3780 out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+
3781 out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+
3782 }
+
3783 out[w * 2 - 1] = stbi__div4(t1 + 2);
+
3784
+ +
3786
+
3787 return out;
+
3788}
+
3789#endif
+
3790
+ +
3792 // resample with nearest-neighbor
+
3793 int i, j;
+ +
3795 for (i = 0; i < w; ++i)
+
3796 for (j = 0; j < hs; ++j)
+
3797 out[i * hs + j] = in_near[i];
+
3798 return out;
+
3799}
+
3800
+
3801// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+
3802// to make sure the code produces the same results in both SIMD and scalar
+
3803#define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8)
+
3804static void stbi__YCbCr_to_RGB_row(stbi_uc * out, const stbi_uc * y, const stbi_uc * pcb, const stbi_uc * pcr, int count,
+
3805 int step) {
+
3806 int i;
+
3807 for (i = 0; i < count; ++i) {
+
3808 int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+
3809 int r, g, b;
+
3810 int cr = pcr[i] - 128;
+
3811 int cb = pcb[i] - 128;
+
3812 r = y_fixed + cr * stbi__float2fixed(1.40200f);
+
3813 g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+
3814 b = y_fixed + cb * stbi__float2fixed(1.77200f);
+
3815 r >>= 20;
+
3816 g >>= 20;
+
3817 b >>= 20;
+
3818 if ((unsigned)r > 255) {
+
3819 if (r < 0)
+
3820 r = 0;
+
3821 else
+
3822 r = 255;
+
3823 }
+
3824 if ((unsigned)g > 255) {
+
3825 if (g < 0)
+
3826 g = 0;
+
3827 else
+
3828 g = 255;
+
3829 }
+
3830 if ((unsigned)b > 255) {
+
3831 if (b < 0)
+
3832 b = 0;
+
3833 else
+
3834 b = 255;
+
3835 }
+
3836 out[0] = (stbi_uc)r;
+
3837 out[1] = (stbi_uc)g;
+
3838 out[2] = (stbi_uc)b;
+
3839 out[3] = 255;
+
3840 out += step;
+
3841 }
+
3842}
+
3843
+
3844#if defined(STBI_SSE2) || defined(STBI_NEON)
+
3845static void stbi__YCbCr_to_RGB_simd(stbi_uc * out, stbi_uc const * y, stbi_uc const * pcb, stbi_uc const * pcr, int count,
+
3846 int step) {
+
3847 int i = 0;
+
3848
+
3849#ifdef STBI_SSE2
+
3850 // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+
3851 // it's useful in practice (you wouldn't use it for textures, for example).
+
3852 // so just accelerate step == 4 case.
+
3853 if (step == 4) {
+
3854 // this is a fairly straightforward implementation and not super-optimized.
+ +
3856 __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
+
3857 __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
+
3858 __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
+
3859 __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
+
3860 __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
+
3861 __m128i xw = _mm_set1_epi16(255); // alpha channel
+
3862
+
3863 for (; i + 7 < count; i += 8) {
+
3864 // load
+ + + + + +
3870
+
3871 // unpack to short (and left-shift cr, cb by 8)
+ + + +
3875
+
3876 // color transform
+ + + + + + + + + +
3886
+
3887 // descale
+ + + +
3891
+
3892 // back to byte, set up for transpose
+ + +
3895
+
3896 // transpose to interleave channels
+ + + + +
3901
+
3902 // store
+
3903 _mm_storeu_si128((__m128i *)(out + 0), o0);
+
3904 _mm_storeu_si128((__m128i *)(out + 16), o1);
+
3905 out += 32;
+
3906 }
+
3907 }
+
3908#endif
+
3909
+
3910#ifdef STBI_NEON
+
3911 // in this version, step=3 support would be easy to add. but is there demand?
+
3912 if (step == 4) {
+
3913 // this is a fairly straightforward implementation and not super-optimized.
+ +
3915 int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
+
3916 int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
+
3917 int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
+
3918 int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
+
3919
+
3920 for (; i + 7 < count; i += 8) {
+
3921 // load
+ + + + + +
3927
+
3928 // expand to s16
+ + + +
3932
+
3933 // color transform
+ + + + + + + +
3941
+
3942 // undo scaling, round, convert to byte
+
3943 uint8x8x4_t o;
+
3944 o.val[0] = vqrshrun_n_s16(rws, 4);
+
3945 o.val[1] = vqrshrun_n_s16(gws, 4);
+
3946 o.val[2] = vqrshrun_n_s16(bws, 4);
+
3947 o.val[3] = vdup_n_u8(255);
+
3948
+
3949 // store, interleaving r/g/b/a
+
3950 vst4_u8(out, o);
+
3951 out += 8 * 4;
+
3952 }
+
3953 }
+
3954#endif
+
3955
+
3956 for (; i < count; ++i) {
+
3957 int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+
3958 int r, g, b;
+
3959 int cr = pcr[i] - 128;
+
3960 int cb = pcb[i] - 128;
+
3961 r = y_fixed + cr * stbi__float2fixed(1.40200f);
+
3962 g = y_fixed + cr * -stbi__float2fixed(0.71414f) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+
3963 b = y_fixed + cb * stbi__float2fixed(1.77200f);
+
3964 r >>= 20;
+
3965 g >>= 20;
+
3966 b >>= 20;
+
3967 if ((unsigned)r > 255) {
+
3968 if (r < 0)
+
3969 r = 0;
+
3970 else
+
3971 r = 255;
+
3972 }
+
3973 if ((unsigned)g > 255) {
+
3974 if (g < 0)
+
3975 g = 0;
+
3976 else
+
3977 g = 255;
+
3978 }
+
3979 if ((unsigned)b > 255) {
+
3980 if (b < 0)
+
3981 b = 0;
+
3982 else
+
3983 b = 255;
+
3984 }
+
3985 out[0] = (stbi_uc)r;
+
3986 out[1] = (stbi_uc)g;
+
3987 out[2] = (stbi_uc)b;
+
3988 out[3] = 255;
+
3989 out += step;
+
3990 }
+
3991}
+
3992#endif
+
3993
+
3994// set up the kernels
+
3995static void stbi__setup_jpeg(stbi__jpeg * j) {
+
3996 j->idct_block_kernel = stbi__idct_block;
+
3997 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+
3998 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
3999
+
4000#ifdef STBI_SSE2
+
4001 if (stbi__sse2_available()) {
+
4002 j->idct_block_kernel = stbi__idct_simd;
+
4003 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+
4004 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+
4005 }
+
4006#endif
+
4007
+
4008#ifdef STBI_NEON
+
4009 j->idct_block_kernel = stbi__idct_simd;
+
4010 j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+
4011 j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+
4012#endif
+
4013}
+
4014
+
4015// clean up the temporary component buffers
+
4016static void stbi__cleanup_jpeg(stbi__jpeg * j) { stbi__free_jpeg_components(j, j->s->img_n, 0); }
+
4017
+
4018typedef struct {
+ +
4020 stbi_uc *line0, *line1;
+
4021 int hs, vs; // expansion factor in each axis
+
4022 int w_lores; // horizontal pixels pre-expansion
+
4023 int ystep; // how far through vertical expansion we are
+
4024 int ypos; // which pre-expansion row we're on
+ +
4026
+
4027// fast 0..255 * 0..255 => 0..255 rounded multiplication
+ +
4029 unsigned int t = x * y + 128;
+
4030 return (stbi_uc)((t + (t >> 8)) >> 8);
+
4031}
+
4032
+
4033static stbi_uc * load_jpeg_image(stbi__jpeg * z, int * out_x, int * out_y, int * comp, int req_comp) {
+
4034 int n, decode_n, is_rgb;
+
4035 z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
4036
+
4037 // validate req_comp
+ +
4039 return stbi__errpuc("bad req_comp", "Internal error");
+
4040
+
4041 // load a jpeg image from whichever source, but leave in YCbCr format
+
4042 if (!stbi__decode_jpeg_image(z)) {
+ +
4044 return NULL;
+
4045 }
+
4046
+
4047 // determine actual number of components to generate
+
4048 n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
4049
+
4050 is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
4051
+
4052 if (z->s->img_n == 3 && n < 3 && !is_rgb)
+
4053 decode_n = 1;
+
4054 else
+
4055 decode_n = z->s->img_n;
+
4056
+
4057 // nothing to do if no components requested; check this now to avoid
+
4058 // accessing uninitialized coutput[0] later
+
4059 if (decode_n <= 0) {
+ +
4061 return NULL;
+
4062 }
+
4063
+
4064 // resample and color-convert
+
4065 {
+
4066 int k;
+
4067 unsigned int i, j;
+
4068 stbi_uc * output;
+
4069 stbi_uc * coutput[4] = {NULL, NULL, NULL, NULL};
+
4070
+ +
4072
+
4073 for (k = 0; k < decode_n; ++k) {
+
4074 stbi__resample * r = &res_comp[k];
+
4075
+
4076 // allocate line buffer big enough for upsampling off the edges
+
4077 // with upsample factor of 4
+
4078 z->img_comp[k].linebuf = (stbi_uc *)stbi__malloc(z->s->img_x + 3);
+
4079 if (!z->img_comp[k].linebuf) {
+ +
4081 return stbi__errpuc("outofmem", "Out of memory");
+
4082 }
+
4083
+
4084 r->hs = z->img_h_max / z->img_comp[k].h;
+
4085 r->vs = z->img_v_max / z->img_comp[k].v;
+
4086 r->ystep = r->vs >> 1;
+
4087 r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
+
4088 r->ypos = 0;
+
4089 r->line0 = r->line1 = z->img_comp[k].data;
+
4090
+
4091 if (r->hs == 1 && r->vs == 1)
+
4092 r->resample = resample_row_1;
+
4093 else if (r->hs == 1 && r->vs == 2)
+
4094 r->resample = stbi__resample_row_v_2;
+
4095 else if (r->hs == 2 && r->vs == 1)
+
4096 r->resample = stbi__resample_row_h_2;
+
4097 else if (r->hs == 2 && r->vs == 2)
+
4098 r->resample = z->resample_row_hv_2_kernel;
+
4099 else
+
4100 r->resample = stbi__resample_row_generic;
+
4101 }
+
4102
+
4103 // can't error after this so, this is safe
+
4104 output = (stbi_uc *)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+
4105 if (!output) {
+ +
4107 return stbi__errpuc("outofmem", "Out of memory");
+
4108 }
+
4109
+
4110 // now go ahead and resample
+
4111 for (j = 0; j < z->s->img_y; ++j) {
+
4112 stbi_uc * out = output + n * z->s->img_x * j;
+
4113 for (k = 0; k < decode_n; ++k) {
+
4114 stbi__resample * r = &res_comp[k];
+
4115 int y_bot = r->ystep >= (r->vs >> 1);
+
4116 coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0, y_bot ? r->line0 : r->line1,
+
4117 r->w_lores, r->hs);
+
4118 if (++r->ystep >= r->vs) {
+
4119 r->ystep = 0;
+
4120 r->line0 = r->line1;
+
4121 if (++r->ypos < z->img_comp[k].y)
+
4122 r->line1 += z->img_comp[k].w2;
+
4123 }
+
4124 }
+
4125 if (n >= 3) {
+
4126 stbi_uc * y = coutput[0];
+
4127 if (z->s->img_n == 3) {
+
4128 if (is_rgb) {
+
4129 for (i = 0; i < z->s->img_x; ++i) {
+
4130 out[0] = y[i];
+
4131 out[1] = coutput[1][i];
+
4132 out[2] = coutput[2][i];
+
4133 out[3] = 255;
+
4134 out += n;
+
4135 }
+
4136 } else {
+
4137 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+
4138 }
+
4139 } else if (z->s->img_n == 4) {
+
4140 if (z->app14_color_transform == 0) { // CMYK
+
4141 for (i = 0; i < z->s->img_x; ++i) {
+
4142 stbi_uc m = coutput[3][i];
+
4143 out[0] = stbi__blinn_8x8(coutput[0][i], m);
+
4144 out[1] = stbi__blinn_8x8(coutput[1][i], m);
+
4145 out[2] = stbi__blinn_8x8(coutput[2][i], m);
+
4146 out[3] = 255;
+
4147 out += n;
+
4148 }
+
4149 } else if (z->app14_color_transform == 2) { // YCCK
+
4150 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+
4151 for (i = 0; i < z->s->img_x; ++i) {
+
4152 stbi_uc m = coutput[3][i];
+
4153 out[0] = stbi__blinn_8x8(255 - out[0], m);
+
4154 out[1] = stbi__blinn_8x8(255 - out[1], m);
+
4155 out[2] = stbi__blinn_8x8(255 - out[2], m);
+
4156 out += n;
+
4157 }
+
4158 } else { // YCbCr + alpha? Ignore the fourth channel for now
+
4159 z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+
4160 }
+
4161 } else
+
4162 for (i = 0; i < z->s->img_x; ++i) {
+
4163 out[0] = out[1] = out[2] = y[i];
+
4164 out[3] = 255; // not used if n==3
+
4165 out += n;
+
4166 }
+
4167 } else {
+
4168 if (is_rgb) {
+
4169 if (n == 1)
+
4170 for (i = 0; i < z->s->img_x; ++i)
+
4171 *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+
4172 else {
+
4173 for (i = 0; i < z->s->img_x; ++i, out += 2) {
+
4174 out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+
4175 out[1] = 255;
+
4176 }
+
4177 }
+
4178 } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+
4179 for (i = 0; i < z->s->img_x; ++i) {
+
4180 stbi_uc m = coutput[3][i];
+
4181 stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+
4182 stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+
4183 stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+
4184 out[0] = stbi__compute_y(r, g, b);
+
4185 out[1] = 255;
+
4186 out += n;
+
4187 }
+
4188 } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+
4189 for (i = 0; i < z->s->img_x; ++i) {
+
4190 out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+
4191 out[1] = 255;
+
4192 out += n;
+
4193 }
+
4194 } else {
+
4195 stbi_uc * y = coutput[0];
+
4196 if (n == 1)
+
4197 for (i = 0; i < z->s->img_x; ++i)
+
4198 out[i] = y[i];
+
4199 else
+
4200 for (i = 0; i < z->s->img_x; ++i) {
+
4201 *out++ = y[i];
+
4202 *out++ = 255;
+
4203 }
+
4204 }
+
4205 }
+
4206 }
+ +
4208 *out_x = z->s->img_x;
+
4209 *out_y = z->s->img_y;
+
4210 if (comp)
+
4211 *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+
4212 return output;
+
4213 }
+
4214}
+
4215
+
4216static void * stbi__jpeg_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
4217 unsigned char * result;
+ +
4219 if (!j)
+
4220 return stbi__errpuc("outofmem", "Out of memory");
+
4221 memset(j, 0, sizeof(stbi__jpeg));
+ +
4223 j->s = s;
+ + +
4226 STBI_FREE(j);
+
4227 return result;
+
4228}
+
4229
+
4230static int stbi__jpeg_test(stbi__context * s) {
+
4231 int r;
+ +
4233 if (!j)
+
4234 return stbi__err("outofmem", "Out of memory");
+
4235 memset(j, 0, sizeof(stbi__jpeg));
+
4236 j->s = s;
+ + +
4239 stbi__rewind(s);
+
4240 STBI_FREE(j);
+
4241 return r;
+
4242}
+
4243
+
4244static int stbi__jpeg_info_raw(stbi__jpeg * j, int * x, int * y, int * comp) {
+ +
4246 stbi__rewind(j->s);
+
4247 return 0;
+
4248 }
+
4249 if (x)
+
4250 *x = j->s->img_x;
+
4251 if (y)
+
4252 *y = j->s->img_y;
+
4253 if (comp)
+
4254 *comp = j->s->img_n >= 3 ? 3 : 1;
+
4255 return 1;
+
4256}
+
4257
+
4258static int stbi__jpeg_info(stbi__context * s, int * x, int * y, int * comp) {
+
4259 int result;
+
4260 stbi__jpeg * j = (stbi__jpeg *)(stbi__malloc(sizeof(stbi__jpeg)));
+
4261 if (!j)
+
4262 return stbi__err("outofmem", "Out of memory");
+
4263 memset(j, 0, sizeof(stbi__jpeg));
+
4264 j->s = s;
+ +
4266 STBI_FREE(j);
+
4267 return result;
+
4268}
+
4269#endif
+
4270
+
4271// public domain zlib decode v0.2 Sean Barrett 2006-11-18
+
4272// simple implementation
+
4273// - all input must be provided in an upfront buffer
+
4274// - all output is written to a single output buffer (can malloc/realloc)
+
4275// performance
+
4276// - fast huffman
+
4277
+
4278#ifndef STBI_NO_ZLIB
+
4279
+
4280// fast-way is faster to check than jpeg huffman, but slow way is slower
+
4281#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
+
4282#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
+
4283#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
+
4284
+
4285// zlib-style huffman encoding
+
4286// (jpegs packs from left, zlib from right, so can't share code)
+
4287typedef struct {
+ + +
4290 int maxcode[17];
+ +
4292 stbi_uc size[STBI__ZNSYMS];
+ + +
4295
+
4296stbi_inline static int stbi__bitreverse16(int n) {
+
4297 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
+
4298 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
+
4299 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
+
4300 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
+
4301 return n;
+
4302}
+
4303
+
4304stbi_inline static int stbi__bit_reverse(int v, int bits) {
+
4305 STBI_ASSERT(bits <= 16);
+
4306 // to bit reverse n bits, reverse 16 and shift
+
4307 // e.g. 11 bits, bit reverse and shift away 5
+
4308 return stbi__bitreverse16(v) >> (16 - bits);
+
4309}
+
4310
+
4311static int stbi__zbuild_huffman(stbi__zhuffman * z, const stbi_uc * sizelist, int num) {
+
4312 int i, k = 0;
+
4313 int code, next_code[16], sizes[17];
+
4314
+
4315 // DEFLATE spec for generating codes
+
4316 memset(sizes, 0, sizeof(sizes));
+
4317 memset(z->fast, 0, sizeof(z->fast));
+
4318 for (i = 0; i < num; ++i)
+
4319 ++sizes[sizelist[i]];
+
4320 sizes[0] = 0;
+
4321 for (i = 1; i < 16; ++i)
+
4322 if (sizes[i] > (1 << i))
+
4323 return stbi__err("bad sizes", "Corrupt PNG");
+
4324 code = 0;
+
4325 for (i = 1; i < 16; ++i) {
+
4326 next_code[i] = code;
+
4327 z->firstcode[i] = (stbi__uint16)code;
+
4328 z->firstsymbol[i] = (stbi__uint16)k;
+
4329 code = (code + sizes[i]);
+
4330 if (sizes[i])
+
4331 if (code - 1 >= (1 << i))
+
4332 return stbi__err("bad codelengths", "Corrupt PNG");
+
4333 z->maxcode[i] = code << (16 - i); // preshift for inner loop
+
4334 code <<= 1;
+
4335 k += sizes[i];
+
4336 }
+
4337 z->maxcode[16] = 0x10000; // sentinel
+
4338 for (i = 0; i < num; ++i) {
+
4339 int s = sizelist[i];
+
4340 if (s) {
+
4341 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+
4342 stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
+
4343 z->size[c] = (stbi_uc)s;
+
4344 z->value[c] = (stbi__uint16)i;
+
4345 if (s <= STBI__ZFAST_BITS) {
+
4346 int j = stbi__bit_reverse(next_code[s], s);
+
4347 while (j < (1 << STBI__ZFAST_BITS)) {
+
4348 z->fast[j] = fastv;
+
4349 j += (1 << s);
+
4350 }
+
4351 }
+
4352 ++next_code[s];
+
4353 }
+
4354 }
+
4355 return 1;
+
4356}
+
4357
+
4358// zlib-from-memory implementation for PNG reading
+
4359// because PNG allows splitting the zlib stream arbitrarily,
+
4360// and it's annoying structurally to have PNG call ZLIB call PNG,
+
4361// we require PNG read all the IDATs and combine them into a single
+
4362// memory buffer
+
4363
+
4364typedef struct {
+ +
4366 int num_bits;
+ +
4368
+
4369 char * zout;
+
4370 char * zout_start;
+
4371 char * zout_end;
+
4372 int z_expandable;
+
4373
+ +
4375} stbi__zbuf;
+
4376
+
4377stbi_inline static int stbi__zeof(stbi__zbuf * z) { return (z->zbuffer >= z->zbuffer_end); }
+
4378
+
4379stbi_inline static stbi_uc stbi__zget8(stbi__zbuf * z) { return stbi__zeof(z) ? 0 : *z->zbuffer++; }
+
4380
+
4381static void stbi__fill_bits(stbi__zbuf * z) {
+
4382 do {
+
4383 if (z->code_buffer >= (1U << z->num_bits)) {
+
4384 z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
+
4385 return;
+
4386 }
+
4387 z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
+
4388 z->num_bits += 8;
+
4389 } while (z->num_bits <= 24);
+
4390}
+
4391
+
4392stbi_inline static unsigned int stbi__zreceive(stbi__zbuf * z, int n) {
+
4393 unsigned int k;
+
4394 if (z->num_bits < n)
+ +
4396 k = z->code_buffer & ((1 << n) - 1);
+
4397 z->code_buffer >>= n;
+
4398 z->num_bits -= n;
+
4399 return k;
+
4400}
+
4401
+ +
4403 int b, s, k;
+
4404 // not resolved by fast table, so compute it the slow way
+
4405 // use jpeg approach, which requires MSbits at top
+
4406 k = stbi__bit_reverse(a->code_buffer, 16);
+
4407 for (s = STBI__ZFAST_BITS + 1;; ++s)
+
4408 if (k < z->maxcode[s])
+
4409 break;
+
4410 if (s >= 16)
+
4411 return -1; // invalid code!
+
4412 // code size is s, so:
+
4413 b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
+
4414 if (b >= STBI__ZNSYMS)
+
4415 return -1; // some data was corrupt somewhere!
+
4416 if (z->size[b] != s)
+
4417 return -1; // was originally an assert, but report failure instead.
+
4418 a->code_buffer >>= s;
+
4419 a->num_bits -= s;
+
4420 return z->value[b];
+
4421}
+
4422
+ +
4424 int b, s;
+
4425 if (a->num_bits < 16) {
+
4426 if (stbi__zeof(a)) {
+
4427 return -1; /* report error for unexpected end of data. */
+
4428 }
+ +
4430 }
+
4431 b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+
4432 if (b) {
+
4433 s = b >> 9;
+
4434 a->code_buffer >>= s;
+
4435 a->num_bits -= s;
+
4436 return b & 511;
+
4437 }
+ +
4439}
+
4440
+
4441static int stbi__zexpand(stbi__zbuf * z, char * zout, int n) // need to make room for n bytes
+
4442{
+
4443 char * q;
+
4444 unsigned int cur, limit, old_limit;
+
4445 z->zout = zout;
+
4446 if (!z->z_expandable)
+
4447 return stbi__err("output buffer limit", "Corrupt PNG");
+
4448 cur = (unsigned int)(z->zout - z->zout_start);
+
4449 limit = old_limit = (unsigned)(z->zout_end - z->zout_start);
+
4450 if (UINT_MAX - cur < (unsigned)n)
+
4451 return stbi__err("outofmem", "Out of memory");
+
4452 while (cur + n > limit) {
+
4453 if (limit > UINT_MAX / 2)
+
4454 return stbi__err("outofmem", "Out of memory");
+
4455 limit *= 2;
+
4456 }
+
4457 q = (char *)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+ +
4459 if (q == NULL)
+
4460 return stbi__err("outofmem", "Out of memory");
+
4461 z->zout_start = q;
+
4462 z->zout = q + cur;
+
4463 z->zout_end = q + limit;
+
4464 return 1;
+
4465}
+
4466
+
4467static const int stbi__zlength_base[31] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+
4468 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+
4469
+
4470static const int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+
4471 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};
+
4472
+
4473static const int stbi__zdist_base[32] = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
+
4474 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
+
4475 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0};
+
4476
+
4477static const int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+
4478 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
+
4479
+ +
4481 char * zout = a->zout;
+
4482 for (;;) {
+
4483 int z = stbi__zhuffman_decode(a, &a->z_length);
+
4484 if (z < 256) {
+
4485 if (z < 0)
+
4486 return stbi__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
+
4487 if (zout >= a->zout_end) {
+
4488 if (!stbi__zexpand(a, zout, 1))
+
4489 return 0;
+
4490 zout = a->zout;
+
4491 }
+
4492 *zout++ = (char)z;
+
4493 } else {
+
4494 stbi_uc * p;
+
4495 int len, dist;
+
4496 if (z == 256) {
+
4497 a->zout = zout;
+
4498 return 1;
+
4499 }
+
4500 if (z >= 286)
+
4501 return stbi__err("bad huffman code",
+
4502 "Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
+
4503 z -= 257;
+ + + +
4507 z = stbi__zhuffman_decode(a, &a->z_distance);
+
4508 if (z < 0 || z >= 30)
+
4509 return stbi__err("bad huffman code",
+
4510 "Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
+ +
4512 if (stbi__zdist_extra[z])
+ +
4514 if (zout - a->zout_start < dist)
+
4515 return stbi__err("bad dist", "Corrupt PNG");
+
4516 if (zout + len > a->zout_end) {
+
4517 if (!stbi__zexpand(a, zout, len))
+
4518 return 0;
+
4519 zout = a->zout;
+
4520 }
+
4521 p = (stbi_uc *)(zout - dist);
+
4522 if (dist == 1) { // run of one byte; common in images.
+
4523 stbi_uc v = *p;
+
4524 if (len) {
+
4525 do
+
4526 *zout++ = v;
+
4527 while (--len);
+
4528 }
+
4529 } else {
+
4530 if (len) {
+
4531 do
+
4532 *zout++ = *p++;
+
4533 while (--len);
+
4534 }
+
4535 }
+
4536 }
+
4537 }
+
4538}
+
4539
+ +
4541 static const stbi_uc length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+ +
4543 stbi_uc lencodes[286 + 32 + 137]; // padding for maximum single op
+ +
4545 int i, n;
+
4546
+
4547 int hlit = stbi__zreceive(a, 5) + 257;
+
4548 int hdist = stbi__zreceive(a, 5) + 1;
+
4549 int hclen = stbi__zreceive(a, 4) + 4;
+
4550 int ntot = hlit + hdist;
+
4551
+ +
4553 for (i = 0; i < hclen; ++i) {
+
4554 int s = stbi__zreceive(a, 3);
+ +
4556 }
+ +
4558 return 0;
+
4559
+
4560 n = 0;
+
4561 while (n < ntot) {
+ +
4563 if (c < 0 || c >= 19)
+
4564 return stbi__err("bad codelengths", "Corrupt PNG");
+
4565 if (c < 16)
+
4566 lencodes[n++] = (stbi_uc)c;
+
4567 else {
+
4568 stbi_uc fill = 0;
+
4569 if (c == 16) {
+
4570 c = stbi__zreceive(a, 2) + 3;
+
4571 if (n == 0)
+
4572 return stbi__err("bad codelengths", "Corrupt PNG");
+
4573 fill = lencodes[n - 1];
+
4574 } else if (c == 17) {
+
4575 c = stbi__zreceive(a, 3) + 3;
+
4576 } else if (c == 18) {
+
4577 c = stbi__zreceive(a, 7) + 11;
+
4578 } else {
+
4579 return stbi__err("bad codelengths", "Corrupt PNG");
+
4580 }
+
4581 if (ntot - n < c)
+
4582 return stbi__err("bad codelengths", "Corrupt PNG");
+
4583 memset(lencodes + n, fill, c);
+
4584 n += c;
+
4585 }
+
4586 }
+
4587 if (n != ntot)
+
4588 return stbi__err("bad codelengths", "Corrupt PNG");
+
4589 if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit))
+
4590 return 0;
+
4591 if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))
+
4592 return 0;
+
4593 return 1;
+
4594}
+
4595
+ +
4597 stbi_uc header[4];
+
4598 int len, nlen, k;
+
4599 if (a->num_bits & 7)
+
4600 stbi__zreceive(a, a->num_bits & 7); // discard
+
4601 // drain the bit-packed data into header
+
4602 k = 0;
+
4603 while (a->num_bits > 0) {
+
4604 header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
+
4605 a->code_buffer >>= 8;
+
4606 a->num_bits -= 8;
+
4607 }
+
4608 if (a->num_bits < 0)
+
4609 return stbi__err("zlib corrupt", "Corrupt PNG");
+
4610 // now fill header the normal way
+
4611 while (k < 4)
+
4612 header[k++] = stbi__zget8(a);
+
4613 len = header[1] * 256 + header[0];
+
4614 nlen = header[3] * 256 + header[2];
+
4615 if (nlen != (len ^ 0xffff))
+
4616 return stbi__err("zlib corrupt", "Corrupt PNG");
+
4617 if (a->zbuffer + len > a->zbuffer_end)
+
4618 return stbi__err("read past buffer", "Corrupt PNG");
+
4619 if (a->zout + len > a->zout_end)
+
4620 if (!stbi__zexpand(a, a->zout, len))
+
4621 return 0;
+
4622 memcpy(a->zout, a->zbuffer, len);
+
4623 a->zbuffer += len;
+
4624 a->zout += len;
+
4625 return 1;
+
4626}
+
4627
+
4628static int stbi__parse_zlib_header(stbi__zbuf * a) {
+
4629 int cmf = stbi__zget8(a);
+
4630 int cm = cmf & 15;
+
4631 /* int cinfo = cmf >> 4; */
+
4632 int flg = stbi__zget8(a);
+
4633 if (stbi__zeof(a))
+
4634 return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
+
4635 if ((cmf * 256 + flg) % 31 != 0)
+
4636 return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
+
4637 if (flg & 32)
+
4638 return stbi__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
+
4639 if (cm != 8)
+
4640 return stbi__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
+
4641 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+
4642 return 1;
+
4643}
+
4644
+ +
4646 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
4647 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
4648 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
4649 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
4650 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+
4651 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+
4652 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+
4653 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};
+
4654static const stbi_uc stbi__zdefault_distance[32] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+
4655 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
+
4656/*
+
4657Init algorithm:
+
4658{
+
4659 int i; // use <= to match clearly with spec
+
4660 for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
+
4661 for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
+
4662 for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
+
4663 for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
+
4664
+
4665 for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
+
4666}
+
4667*/
+
4668
+
4669static int stbi__parse_zlib(stbi__zbuf * a, int parse_header) {
+
4670 int final, type;
+
4671 if (parse_header)
+ +
4673 return 0;
+
4674 a->num_bits = 0;
+
4675 a->code_buffer = 0;
+
4676 do {
+
4677 final = stbi__zreceive(a, 1);
+
4678 type = stbi__zreceive(a, 2);
+
4679 if (type == 0) {
+ +
4681 return 0;
+
4682 } else if (type == 3) {
+
4683 return 0;
+
4684 } else {
+
4685 if (type == 1) {
+
4686 // use fixed code lengths
+ +
4688 return 0;
+
4689 if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32))
+
4690 return 0;
+
4691 } else {
+ +
4693 return 0;
+
4694 }
+ +
4696 return 0;
+
4697 }
+
4698 } while (!final);
+
4699 return 1;
+
4700}
+
4701
+
4702static int stbi__do_zlib(stbi__zbuf * a, char * obuf, int olen, int exp, int parse_header) {
+
4703 a->zout_start = obuf;
+
4704 a->zout = obuf;
+
4705 a->zout_end = obuf + olen;
+
4706 a->z_expandable = exp;
+
4707
+ +
4709}
+
4710
+
4711STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char * buffer, int len, int initial_size, int * outlen) {
+
4712 stbi__zbuf a;
+
4713 char * p = (char *)stbi__malloc(initial_size);
+
4714 if (p == NULL)
+
4715 return NULL;
+
4716 a.zbuffer = (stbi_uc *)buffer;
+
4717 a.zbuffer_end = (stbi_uc *)buffer + len;
+
4718 if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+
4719 if (outlen)
+
4720 *outlen = (int)(a.zout - a.zout_start);
+
4721 return a.zout_start;
+
4722 } else {
+
4723 STBI_FREE(a.zout_start);
+
4724 return NULL;
+
4725 }
+
4726}
+
4727
+
4728STBIDEF char * stbi_zlib_decode_malloc(char const * buffer, int len, int * outlen) {
+
4729 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+
4730}
+
4731
+
4732STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer, int len, int initial_size, int * outlen,
+
4733 int parse_header) {
+
4734 stbi__zbuf a;
+
4735 char * p = (char *)stbi__malloc(initial_size);
+
4736 if (p == NULL)
+
4737 return NULL;
+
4738 a.zbuffer = (stbi_uc *)buffer;
+
4739 a.zbuffer_end = (stbi_uc *)buffer + len;
+
4740 if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+
4741 if (outlen)
+
4742 *outlen = (int)(a.zout - a.zout_start);
+
4743 return a.zout_start;
+
4744 } else {
+
4745 STBI_FREE(a.zout_start);
+
4746 return NULL;
+
4747 }
+
4748}
+
4749
+
4750STBIDEF int stbi_zlib_decode_buffer(char * obuffer, int olen, char const * ibuffer, int ilen) {
+
4751 stbi__zbuf a;
+
4752 a.zbuffer = (stbi_uc *)ibuffer;
+
4753 a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
+
4754 if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+
4755 return (int)(a.zout - a.zout_start);
+
4756 else
+
4757 return -1;
+
4758}
+
4759
+
4760STBIDEF char * stbi_zlib_decode_noheader_malloc(char const * buffer, int len, int * outlen) {
+
4761 stbi__zbuf a;
+
4762 char * p = (char *)stbi__malloc(16384);
+
4763 if (p == NULL)
+
4764 return NULL;
+
4765 a.zbuffer = (stbi_uc *)buffer;
+
4766 a.zbuffer_end = (stbi_uc *)buffer + len;
+
4767 if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+
4768 if (outlen)
+
4769 *outlen = (int)(a.zout - a.zout_start);
+
4770 return a.zout_start;
+
4771 } else {
+
4772 STBI_FREE(a.zout_start);
+
4773 return NULL;
+
4774 }
+
4775}
+
4776
+
4777STBIDEF int stbi_zlib_decode_noheader_buffer(char * obuffer, int olen, const char * ibuffer, int ilen) {
+
4778 stbi__zbuf a;
+
4779 a.zbuffer = (stbi_uc *)ibuffer;
+
4780 a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
+
4781 if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+
4782 return (int)(a.zout - a.zout_start);
+
4783 else
+
4784 return -1;
+
4785}
+
4786#endif
+
4787
+
4788// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
+
4789// simple implementation
+
4790// - only 8-bit samples
+
4791// - no CRC checking
+
4792// - allocates lots of intermediate memory
+
4793// - avoids problem of streaming data between subsystems
+
4794// - avoids explicit window management
+
4795// performance
+
4796// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
4797
+
4798#ifndef STBI_NO_PNG
+
4799typedef struct {
+
4800 stbi__uint32 length;
+ + +
4803
+ + +
4806 c.length = stbi__get32be(s);
+
4807 c.type = stbi__get32be(s);
+
4808 return c;
+
4809}
+
4810
+ +
4812 static const stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
4813 int i;
+
4814 for (i = 0; i < 8; ++i)
+
4815 if (stbi__get8(s) != png_sig[i])
+
4816 return stbi__err("bad png sig", "Not a PNG");
+
4817 return 1;
+
4818}
+
4819
+
4820typedef struct {
+
4821 stbi__context * s;
+ +
4823 int depth;
+
4824} stbi__png;
+
4825
+
4826enum {
+
4827 STBI__F_none = 0,
+
4828 STBI__F_sub = 1,
+
4829 STBI__F_up = 2,
+
4830 STBI__F_avg = 3,
+
4831 STBI__F_paeth = 4,
+
4832 // synthetic filters used for first scanline to avoid needing a dummy row of 0s
+ + +
4835};
+
4836
+ +
4838
+
4839static int stbi__paeth(int a, int b, int c) {
+
4840 int p = a + b - c;
+
4841 int pa = abs(p - a);
+
4842 int pb = abs(p - b);
+
4843 int pc = abs(p - c);
+
4844 if (pa <= pb && pa <= pc)
+
4845 return a;
+
4846 if (pb <= pc)
+
4847 return b;
+
4848 return c;
+
4849}
+
4850
+
4851static const stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01};
+
4852
+
4853// create the png data from post-deflated data
+ +
4855 stbi__uint32 y, int depth, int color) {
+
4856 int bytes = (depth == 16 ? 2 : 1);
+
4857 stbi__context * s = a->s;
+
4858 stbi__uint32 i, j, stride = x * out_n * bytes;
+ +
4860 int k;
+
4861 int img_n = s->img_n; // copy it into a local for later
+
4862
+
4863 int output_bytes = out_n * bytes;
+
4864 int filter_bytes = img_n * bytes;
+
4865 int width = x;
+
4866
+
4867 STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
+
4868 a->out = (stbi_uc *)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+
4869 if (!a->out)
+
4870 return stbi__err("outofmem", "Out of memory");
+
4871
+ +
4873 return stbi__err("too large", "Corrupt PNG");
+
4874 img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+
4875 img_len = (img_width_bytes + 1) * y;
+
4876
+
4877 // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+
4878 // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+
4879 // so just check for raw_len < img_len always.
+
4880 if (raw_len < img_len)
+
4881 return stbi__err("not enough pixels", "Corrupt PNG");
+
4882
+
4883 for (j = 0; j < y; ++j) {
+
4884 stbi_uc * cur = a->out + stride * j;
+
4885 stbi_uc * prior;
+
4886 int filter = *raw++;
+
4887
+
4888 if (filter > 4)
+
4889 return stbi__err("invalid filter", "Corrupt PNG");
+
4890
+
4891 if (depth < 8) {
+
4892 if (img_width_bytes > x)
+
4893 return stbi__err("invalid width", "Corrupt PNG");
+
4894 cur += x * out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+
4895 filter_bytes = 1;
+ +
4897 }
+
4898 prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
+
4899
+
4900 // if first row, use special filter that doesn't sample previous row
+
4901 if (j == 0)
+ +
4903
+
4904 // handle first byte explicitly
+
4905 for (k = 0; k < filter_bytes; ++k) {
+
4906 switch (filter) {
+
4907 case STBI__F_none:
+
4908 cur[k] = raw[k];
+
4909 break;
+
4910 case STBI__F_sub:
+
4911 cur[k] = raw[k];
+
4912 break;
+
4913 case STBI__F_up:
+
4914 cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
+
4915 break;
+
4916 case STBI__F_avg:
+
4917 cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));
+
4918 break;
+
4919 case STBI__F_paeth:
+
4920 cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0));
+
4921 break;
+
4922 case STBI__F_avg_first:
+
4923 cur[k] = raw[k];
+
4924 break;
+ +
4926 cur[k] = raw[k];
+
4927 break;
+
4928 }
+
4929 }
+
4930
+
4931 if (depth == 8) {
+
4932 if (img_n != out_n)
+
4933 cur[img_n] = 255; // first pixel
+
4934 raw += img_n;
+
4935 cur += out_n;
+
4936 prior += out_n;
+
4937 } else if (depth == 16) {
+
4938 if (img_n != out_n) {
+
4939 cur[filter_bytes] = 255; // first pixel top byte
+
4940 cur[filter_bytes + 1] = 255; // first pixel bottom byte
+
4941 }
+
4942 raw += filter_bytes;
+
4943 cur += output_bytes;
+ +
4945 } else {
+
4946 raw += 1;
+
4947 cur += 1;
+
4948 prior += 1;
+
4949 }
+
4950
+
4951 // this is a little gross, so that we don't switch per-pixel or per-component
+
4952 if (depth < 8 || img_n == out_n) {
+
4953 int nk = (width - 1) * filter_bytes;
+
4954#define STBI__CASE(f) \
+
4955 case f: \
+
4956 for (k = 0; k < nk; ++k)
+
4957 switch (filter) {
+
4958 // "none" filter turns into a memcpy here; make that explicit.
+
4959 case STBI__F_none:
+
4960 memcpy(cur, raw, nk);
+
4961 break;
+ +
4963 break;
+
4964 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }
+
4965 break;
+
4966 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); }
+
4967 break;
+ + +
4970 }
+
4971 break;
+ +
4973 break;
+ +
4975 break;
+
4976 }
+
4977#undef STBI__CASE
+
4978 raw += nk;
+
4979 } else {
+
4980 STBI_ASSERT(img_n + 1 == out_n);
+
4981#define STBI__CASE(f) \
+
4982 case f: \
+
4983 for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, raw += filter_bytes, cur += output_bytes, prior += output_bytes) \
+
4984 for (k = 0; k < filter_bytes; ++k)
+
4985 switch (filter) {
+
4986 STBI__CASE(STBI__F_none) { cur[k] = raw[k]; }
+
4987 break;
+ +
4989 break;
+
4990 STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }
+
4991 break;
+
4992 STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); }
+
4993 break;
+ + +
4996 }
+
4997 break;
+ +
4999 break;
+ +
5001 break;
+
5002 }
+
5003#undef STBI__CASE
+
5004
+
5005 // the loop above sets the high byte of the pixels' alpha, but for
+
5006 // 16 bit png files we also need the low byte set. we'll do that here.
+
5007 if (depth == 16) {
+
5008 cur = a->out + stride * j; // start at the beginning of the row again
+
5009 for (i = 0; i < x; ++i, cur += output_bytes) {
+
5010 cur[filter_bytes + 1] = 255;
+
5011 }
+
5012 }
+
5013 }
+
5014 }
+
5015
+
5016 // we make a separate pass to expand bits to pixels; for performance,
+
5017 // this could run two scanlines behind the above code, so it won't
+
5018 // intefere with filtering but will still be in the cache.
+
5019 if (depth < 8) {
+
5020 for (j = 0; j < y; ++j) {
+
5021 stbi_uc * cur = a->out + stride * j;
+
5022 stbi_uc * in = a->out + stride * j + x * out_n - img_width_bytes;
+
5023 // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for
+
5024 // 1/2/4-bit png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that
+
5025 // will be skipped in the later loop
+
5026 stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
5027
+
5028 // note that the final byte might overshoot and write more data than desired.
+
5029 // we can allocate enough data that this never writes out of memory, but it
+
5030 // could also overwrite the next scanline. can it overwrite non-empty data
+
5031 // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+
5032 // so we need to explicitly clamp the final ones
+
5033
+
5034 if (depth == 4) {
+
5035 for (k = x * img_n; k >= 2; k -= 2, ++in) {
+
5036 *cur++ = scale * ((*in >> 4));
+
5037 *cur++ = scale * ((*in) & 0x0f);
+
5038 }
+
5039 if (k > 0)
+
5040 *cur++ = scale * ((*in >> 4));
+
5041 } else if (depth == 2) {
+
5042 for (k = x * img_n; k >= 4; k -= 4, ++in) {
+
5043 *cur++ = scale * ((*in >> 6));
+
5044 *cur++ = scale * ((*in >> 4) & 0x03);
+
5045 *cur++ = scale * ((*in >> 2) & 0x03);
+
5046 *cur++ = scale * ((*in) & 0x03);
+
5047 }
+
5048 if (k > 0)
+
5049 *cur++ = scale * ((*in >> 6));
+
5050 if (k > 1)
+
5051 *cur++ = scale * ((*in >> 4) & 0x03);
+
5052 if (k > 2)
+
5053 *cur++ = scale * ((*in >> 2) & 0x03);
+
5054 } else if (depth == 1) {
+
5055 for (k = x * img_n; k >= 8; k -= 8, ++in) {
+
5056 *cur++ = scale * ((*in >> 7));
+
5057 *cur++ = scale * ((*in >> 6) & 0x01);
+
5058 *cur++ = scale * ((*in >> 5) & 0x01);
+
5059 *cur++ = scale * ((*in >> 4) & 0x01);
+
5060 *cur++ = scale * ((*in >> 3) & 0x01);
+
5061 *cur++ = scale * ((*in >> 2) & 0x01);
+
5062 *cur++ = scale * ((*in >> 1) & 0x01);
+
5063 *cur++ = scale * ((*in) & 0x01);
+
5064 }
+
5065 if (k > 0)
+
5066 *cur++ = scale * ((*in >> 7));
+
5067 if (k > 1)
+
5068 *cur++ = scale * ((*in >> 6) & 0x01);
+
5069 if (k > 2)
+
5070 *cur++ = scale * ((*in >> 5) & 0x01);
+
5071 if (k > 3)
+
5072 *cur++ = scale * ((*in >> 4) & 0x01);
+
5073 if (k > 4)
+
5074 *cur++ = scale * ((*in >> 3) & 0x01);
+
5075 if (k > 5)
+
5076 *cur++ = scale * ((*in >> 2) & 0x01);
+
5077 if (k > 6)
+
5078 *cur++ = scale * ((*in >> 1) & 0x01);
+
5079 }
+
5080 if (img_n != out_n) {
+
5081 int q;
+
5082 // insert alpha = 255
+
5083 cur = a->out + stride * j;
+
5084 if (img_n == 1) {
+
5085 for (q = x - 1; q >= 0; --q) {
+
5086 cur[q * 2 + 1] = 255;
+
5087 cur[q * 2 + 0] = cur[q];
+
5088 }
+
5089 } else {
+
5090 STBI_ASSERT(img_n == 3);
+
5091 for (q = x - 1; q >= 0; --q) {
+
5092 cur[q * 4 + 3] = 255;
+
5093 cur[q * 4 + 2] = cur[q * 3 + 2];
+
5094 cur[q * 4 + 1] = cur[q * 3 + 1];
+
5095 cur[q * 4 + 0] = cur[q * 3 + 0];
+
5096 }
+
5097 }
+
5098 }
+
5099 }
+
5100 } else if (depth == 16) {
+
5101 // force the image data from big-endian to platform-native.
+
5102 // this is done in a separate pass due to the decoding relying
+
5103 // on the data being untouched, but could probably be done
+
5104 // per-line during decode if care is taken.
+
5105 stbi_uc * cur = a->out;
+ +
5107
+
5108 for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
+
5109 *cur16 = (cur[0] << 8) | cur[1];
+
5110 }
+
5111 }
+
5112
+
5113 return 1;
+
5114}
+
5115
+ +
5117 int color, int interlaced) {
+
5118 int bytes = (depth == 16 ? 2 : 1);
+
5119 int out_bytes = out_n * bytes;
+
5120 stbi_uc * final;
+
5121 int p;
+
5122 if (!interlaced)
+
5123 return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
5124
+
5125 // de-interlacing
+
5126 final = (stbi_uc *)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+
5127 if (!final)
+
5128 return stbi__err("outofmem", "Out of memory");
+
5129 for (p = 0; p < 7; ++p) {
+
5130 int xorig[] = {0, 4, 0, 2, 0, 1, 0};
+
5131 int yorig[] = {0, 0, 4, 0, 2, 0, 1};
+
5132 int xspc[] = {8, 8, 4, 4, 2, 2, 1};
+
5133 int yspc[] = {8, 8, 8, 4, 4, 2, 2};
+
5134 int i, j, x, y;
+
5135 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+
5136 x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
+
5137 y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
+
5138 if (x && y) {
+
5139 stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+ +
5141 STBI_FREE(final);
+
5142 return 0;
+
5143 }
+
5144 for (j = 0; j < y; ++j) {
+
5145 for (i = 0; i < x; ++i) {
+
5146 int out_y = j * yspc[p] + yorig[p];
+
5147 int out_x = i * xspc[p] + xorig[p];
+
5148 memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes, a->out + (j * x + i) * out_bytes,
+
5149 out_bytes);
+
5150 }
+
5151 }
+
5152 STBI_FREE(a->out);
+ + +
5155 }
+
5156 }
+
5157 a->out = final;
+
5158
+
5159 return 1;
+
5160}
+
5161
+
5162static int stbi__compute_transparency(stbi__png * z, stbi_uc tc[3], int out_n) {
+
5163 stbi__context * s = z->s;
+
5164 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+
5165 stbi_uc * p = z->out;
+
5166
+
5167 // compute color-based transparency, assuming we've
+
5168 // already got 255 as the alpha value in the output
+
5169 STBI_ASSERT(out_n == 2 || out_n == 4);
+
5170
+
5171 if (out_n == 2) {
+
5172 for (i = 0; i < pixel_count; ++i) {
+
5173 p[1] = (p[0] == tc[0] ? 0 : 255);
+
5174 p += 2;
+
5175 }
+
5176 } else {
+
5177 for (i = 0; i < pixel_count; ++i) {
+
5178 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+
5179 p[3] = 0;
+
5180 p += 4;
+
5181 }
+
5182 }
+
5183 return 1;
+
5184}
+
5185
+ +
5187 stbi__context * s = z->s;
+
5188 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+
5189 stbi__uint16 * p = (stbi__uint16 *)z->out;
+
5190
+
5191 // compute color-based transparency, assuming we've
+
5192 // already got 65535 as the alpha value in the output
+
5193 STBI_ASSERT(out_n == 2 || out_n == 4);
+
5194
+
5195 if (out_n == 2) {
+
5196 for (i = 0; i < pixel_count; ++i) {
+
5197 p[1] = (p[0] == tc[0] ? 0 : 65535);
+
5198 p += 2;
+
5199 }
+
5200 } else {
+
5201 for (i = 0; i < pixel_count; ++i) {
+
5202 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+
5203 p[3] = 0;
+
5204 p += 4;
+
5205 }
+
5206 }
+
5207 return 1;
+
5208}
+
5209
+
5210static int stbi__expand_png_palette(stbi__png * a, stbi_uc * palette, int len, int pal_img_n) {
+
5211 stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+
5212 stbi_uc *p, *temp_out, *orig = a->out;
+
5213
+ +
5215 if (p == NULL)
+
5216 return stbi__err("outofmem", "Out of memory");
+
5217
+
5218 // between here and free(out) below, exitting would leak
+
5219 temp_out = p;
+
5220
+
5221 if (pal_img_n == 3) {
+
5222 for (i = 0; i < pixel_count; ++i) {
+
5223 int n = orig[i] * 4;
+
5224 p[0] = palette[n];
+
5225 p[1] = palette[n + 1];
+
5226 p[2] = palette[n + 2];
+
5227 p += 3;
+
5228 }
+
5229 } else {
+
5230 for (i = 0; i < pixel_count; ++i) {
+
5231 int n = orig[i] * 4;
+
5232 p[0] = palette[n];
+
5233 p[1] = palette[n + 1];
+
5234 p[2] = palette[n + 2];
+
5235 p[3] = palette[n + 3];
+
5236 p += 4;
+
5237 }
+
5238 }
+
5239 STBI_FREE(a->out);
+
5240 a->out = temp_out;
+
5241
+ +
5243
+
5244 return 1;
+
5245}
+
5246
+ +
5248static int stbi__de_iphone_flag_global = 0;
+
5249
+
5250STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) {
+ +
5252}
+
5253
+
5254STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) {
+ +
5256}
+
5257
+
5258#ifndef STBI_THREAD_LOCAL
+
5259#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
+
5260#define stbi__de_iphone_flag stbi__de_iphone_flag_global
+
5261#else
+ + +
5264
+
5265STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) {
+ + +
5268}
+
5269
+
5270STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) {
+ + +
5273}
+
5274
+
5275#define stbi__unpremultiply_on_load \
+
5276 (stbi__unpremultiply_on_load_set ? stbi__unpremultiply_on_load_local : stbi__unpremultiply_on_load_global)
+
5277#define stbi__de_iphone_flag (stbi__de_iphone_flag_set ? stbi__de_iphone_flag_local : stbi__de_iphone_flag_global)
+
5278#endif // STBI_THREAD_LOCAL
+
5279
+
5280static void stbi__de_iphone(stbi__png * z) {
+
5281 stbi__context * s = z->s;
+
5282 stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+
5283 stbi_uc * p = z->out;
+
5284
+
5285 if (s->img_out_n == 3) { // convert bgr to rgb
+
5286 for (i = 0; i < pixel_count; ++i) {
+
5287 stbi_uc t = p[0];
+
5288 p[0] = p[2];
+
5289 p[2] = t;
+
5290 p += 3;
+
5291 }
+
5292 } else {
+
5293 STBI_ASSERT(s->img_out_n == 4);
+ +
5295 // convert bgr to rgb and unpremultiply
+
5296 for (i = 0; i < pixel_count; ++i) {
+
5297 stbi_uc a = p[3];
+
5298 stbi_uc t = p[0];
+
5299 if (a) {
+
5300 stbi_uc half = a / 2;
+
5301 p[0] = (p[2] * 255 + half) / a;
+
5302 p[1] = (p[1] * 255 + half) / a;
+
5303 p[2] = (t * 255 + half) / a;
+
5304 } else {
+
5305 p[0] = p[2];
+
5306 p[2] = t;
+
5307 }
+
5308 p += 4;
+
5309 }
+
5310 } else {
+
5311 // convert bgr to rgb
+
5312 for (i = 0; i < pixel_count; ++i) {
+
5313 stbi_uc t = p[0];
+
5314 p[0] = p[2];
+
5315 p[2] = t;
+
5316 p += 4;
+
5317 }
+
5318 }
+
5319 }
+
5320}
+
5321
+
5322#define STBI__PNG_TYPE(a, b, c, d) (((unsigned)(a) << 24) + ((unsigned)(b) << 16) + ((unsigned)(c) << 8) + (unsigned)(d))
+
5323
+
5324static int stbi__parse_png_file(stbi__png * z, int scan, int req_comp) {
+
5325 stbi_uc palette[1024], pal_img_n = 0;
+
5326 stbi_uc has_trans = 0, tc[3] = {0};
+
5327 stbi__uint16 tc16[3];
+
5328 stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
+
5329 int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
+
5330 stbi__context * s = z->s;
+
5331
+
5332 z->expanded = NULL;
+
5333 z->idata = NULL;
+
5334 z->out = NULL;
+
5335
+ +
5337 return 0;
+
5338
+
5339 if (scan == STBI__SCAN_type)
+
5340 return 1;
+
5341
+
5342 for (;;) {
+ +
5344 switch (c.type) {
+
5345 case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
+
5346 is_iphone = 1;
+
5347 stbi__skip(s, c.length);
+
5348 break;
+
5349 case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
+
5350 int comp, filter;
+
5351 if (!first)
+
5352 return stbi__err("multiple IHDR", "Corrupt PNG");
+
5353 first = 0;
+
5354 if (c.length != 13)
+
5355 return stbi__err("bad IHDR len", "Corrupt PNG");
+
5356 s->img_x = stbi__get32be(s);
+
5357 s->img_y = stbi__get32be(s);
+
5358 if (s->img_y > STBI_MAX_DIMENSIONS)
+
5359 return stbi__err("too large", "Very large image (corrupt?)");
+
5360 if (s->img_x > STBI_MAX_DIMENSIONS)
+
5361 return stbi__err("too large", "Very large image (corrupt?)");
+
5362 z->depth = stbi__get8(s);
+
5363 if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)
+
5364 return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
+
5365 color = stbi__get8(s);
+
5366 if (color > 6)
+
5367 return stbi__err("bad ctype", "Corrupt PNG");
+
5368 if (color == 3 && z->depth == 16)
+
5369 return stbi__err("bad ctype", "Corrupt PNG");
+
5370 if (color == 3)
+
5371 pal_img_n = 3;
+
5372 else if (color & 1)
+
5373 return stbi__err("bad ctype", "Corrupt PNG");
+
5374 comp = stbi__get8(s);
+
5375 if (comp)
+
5376 return stbi__err("bad comp method", "Corrupt PNG");
+
5377 filter = stbi__get8(s);
+
5378 if (filter)
+
5379 return stbi__err("bad filter method", "Corrupt PNG");
+ +
5381 if (interlace > 1)
+
5382 return stbi__err("bad interlace method", "Corrupt PNG");
+
5383 if (!s->img_x || !s->img_y)
+
5384 return stbi__err("0-pixel image", "Corrupt PNG");
+
5385 if (!pal_img_n) {
+
5386 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+
5387 if ((1 << 30) / s->img_x / s->img_n < s->img_y)
+
5388 return stbi__err("too large", "Image too large to decode");
+
5389 } else {
+
5390 // if paletted, then pal_n is our final components, and
+
5391 // img_n is # components to decompress/filter.
+
5392 s->img_n = 1;
+
5393 if ((1 << 30) / s->img_x / 4 < s->img_y)
+
5394 return stbi__err("too large", "Corrupt PNG");
+
5395 }
+
5396 // even with SCAN_header, have to scan to see if we have a tRNS
+
5397 break;
+
5398 }
+
5399
+
5400 case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
+
5401 if (first)
+
5402 return stbi__err("first not IHDR", "Corrupt PNG");
+
5403 if (c.length > 256 * 3)
+
5404 return stbi__err("invalid PLTE", "Corrupt PNG");
+
5405 pal_len = c.length / 3;
+
5406 if (pal_len * 3 != c.length)
+
5407 return stbi__err("invalid PLTE", "Corrupt PNG");
+
5408 for (i = 0; i < pal_len; ++i) {
+
5409 palette[i * 4 + 0] = stbi__get8(s);
+
5410 palette[i * 4 + 1] = stbi__get8(s);
+
5411 palette[i * 4 + 2] = stbi__get8(s);
+
5412 palette[i * 4 + 3] = 255;
+
5413 }
+
5414 break;
+
5415 }
+
5416
+
5417 case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
+
5418 if (first)
+
5419 return stbi__err("first not IHDR", "Corrupt PNG");
+
5420 if (z->idata)
+
5421 return stbi__err("tRNS after IDAT", "Corrupt PNG");
+
5422 if (pal_img_n) {
+
5423 if (scan == STBI__SCAN_header) {
+
5424 s->img_n = 4;
+
5425 return 1;
+
5426 }
+
5427 if (pal_len == 0)
+
5428 return stbi__err("tRNS before PLTE", "Corrupt PNG");
+
5429 if (c.length > pal_len)
+
5430 return stbi__err("bad tRNS len", "Corrupt PNG");
+
5431 pal_img_n = 4;
+
5432 for (i = 0; i < c.length; ++i)
+
5433 palette[i * 4 + 3] = stbi__get8(s);
+
5434 } else {
+
5435 if (!(s->img_n & 1))
+
5436 return stbi__err("tRNS with alpha", "Corrupt PNG");
+
5437 if (c.length != (stbi__uint32)s->img_n * 2)
+
5438 return stbi__err("bad tRNS len", "Corrupt PNG");
+
5439 has_trans = 1;
+
5440 // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
+
5441 if (scan == STBI__SCAN_header) {
+
5442 ++s->img_n;
+
5443 return 1;
+
5444 }
+
5445 if (z->depth == 16) {
+
5446 for (k = 0; k < s->img_n; ++k)
+
5447 tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+
5448 } else {
+
5449 for (k = 0; k < s->img_n; ++k)
+
5450 tc[k] = (stbi_uc)(stbi__get16be(s) & 255) *
+
5451 stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+
5452 }
+
5453 }
+
5454 break;
+
5455 }
+
5456
+
5457 case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
+
5458 if (first)
+
5459 return stbi__err("first not IHDR", "Corrupt PNG");
+
5460 if (pal_img_n && !pal_len)
+
5461 return stbi__err("no PLTE", "Corrupt PNG");
+
5462 if (scan == STBI__SCAN_header) {
+
5463 // header scan definitely stops at first IDAT
+
5464 if (pal_img_n)
+
5465 s->img_n = pal_img_n;
+
5466 return 1;
+
5467 }
+
5468 if (c.length > (1u << 30))
+
5469 return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
+
5470 if ((int)(ioff + c.length) < (int)ioff)
+
5471 return 0;
+
5472 if (ioff + c.length > idata_limit) {
+ +
5474 stbi_uc * p;
+
5475 if (idata_limit == 0)
+
5476 idata_limit = c.length > 4096 ? c.length : 4096;
+
5477 while (ioff + c.length > idata_limit)
+
5478 idata_limit *= 2;
+ + +
5481 if (p == NULL)
+
5482 return stbi__err("outofmem", "Out of memory");
+
5483 z->idata = p;
+
5484 }
+
5485 if (!stbi__getn(s, z->idata + ioff, c.length))
+
5486 return stbi__err("outofdata", "Corrupt PNG");
+
5487 ioff += c.length;
+
5488 break;
+
5489 }
+
5490
+
5491 case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
+ +
5493 if (first)
+
5494 return stbi__err("first not IHDR", "Corrupt PNG");
+
5495 if (scan != STBI__SCAN_load)
+
5496 return 1;
+
5497 if (z->idata == NULL)
+
5498 return stbi__err("no IDAT", "Corrupt PNG");
+
5499 // initial guess for decoded data size to avoid unnecessary reallocs
+
5500 bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+
5501 raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+
5502 z->expanded = (stbi_uc *)stbi_zlib_decode_malloc_guesssize_headerflag((char *)z->idata, ioff, raw_len,
+
5503 (int *)&raw_len, !is_iphone);
+
5504 if (z->expanded == NULL)
+
5505 return 0; // zlib should set error
+
5506 STBI_FREE(z->idata);
+
5507 z->idata = NULL;
+
5508 if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
+
5509 s->img_out_n = s->img_n + 1;
+
5510 else
+
5511 s->img_out_n = s->img_n;
+
5512 if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace))
+
5513 return 0;
+
5514 if (has_trans) {
+
5515 if (z->depth == 16) {
+
5516 if (!stbi__compute_transparency16(z, tc16, s->img_out_n))
+
5517 return 0;
+
5518 } else {
+
5519 if (!stbi__compute_transparency(z, tc, s->img_out_n))
+
5520 return 0;
+
5521 }
+
5522 }
+
5523 if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+ +
5525 if (pal_img_n) {
+
5526 // pal_img_n == 3 or 4
+
5527 s->img_n = pal_img_n; // record the actual colors we had
+
5528 s->img_out_n = pal_img_n;
+
5529 if (req_comp >= 3)
+
5530 s->img_out_n = req_comp;
+
5531 if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+
5532 return 0;
+
5533 } else if (has_trans) {
+
5534 // non-paletted image with tRNS -> source image has (constant) alpha
+
5535 ++s->img_n;
+
5536 }
+
5537 STBI_FREE(z->expanded);
+
5538 z->expanded = NULL;
+
5539 // end of PNG chunk, read and skip CRC
+ +
5541 return 1;
+
5542 }
+
5543
+
5544 default:
+
5545 // if critical, fail
+
5546 if (first)
+
5547 return stbi__err("first not IHDR", "Corrupt PNG");
+
5548 if ((c.type & (1 << 29)) == 0) {
+
5549#ifndef STBI_NO_FAILURE_STRINGS
+
5550 // not threadsafe
+
5551 static char invalid_chunk[] = "XXXX PNG chunk not known";
+
5552 invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+
5553 invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+
5554 invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
+
5555 invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
+
5556#endif
+
5557 return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+
5558 }
+
5559 stbi__skip(s, c.length);
+
5560 break;
+
5561 }
+
5562 // end of PNG chunk, read and skip CRC
+ +
5564 }
+
5565}
+
5566
+
5567static void * stbi__do_png(stbi__png * p, int * x, int * y, int * n, int req_comp, stbi__result_info * ri) {
+
5568 void * result = NULL;
+ +
5570 return stbi__errpuc("bad req_comp", "Internal error");
+ +
5572 if (p->depth <= 8)
+
5573 ri->bits_per_channel = 8;
+
5574 else if (p->depth == 16)
+
5575 ri->bits_per_channel = 16;
+
5576 else
+
5577 return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
+
5578 result = p->out;
+
5579 p->out = NULL;
+
5580 if (req_comp && req_comp != p->s->img_out_n) {
+
5581 if (ri->bits_per_channel == 8)
+
5582 result = stbi__convert_format((unsigned char *)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+
5583 else
+
5584 result = stbi__convert_format16((stbi__uint16 *)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+
5585 p->s->img_out_n = req_comp;
+
5586 if (result == NULL)
+
5587 return result;
+
5588 }
+
5589 *x = p->s->img_x;
+
5590 *y = p->s->img_y;
+
5591 if (n)
+
5592 *n = p->s->img_n;
+
5593 }
+
5594 STBI_FREE(p->out);
+
5595 p->out = NULL;
+
5596 STBI_FREE(p->expanded);
+
5597 p->expanded = NULL;
+
5598 STBI_FREE(p->idata);
+
5599 p->idata = NULL;
+
5600
+
5601 return result;
+
5602}
+
5603
+
5604static void * stbi__png_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
5605 stbi__png p;
+
5606 p.s = s;
+
5607 return stbi__do_png(&p, x, y, comp, req_comp, ri);
+
5608}
+
5609
+
5610static int stbi__png_test(stbi__context * s) {
+
5611 int r;
+ +
5613 stbi__rewind(s);
+
5614 return r;
+
5615}
+
5616
+
5617static int stbi__png_info_raw(stbi__png * p, int * x, int * y, int * comp) {
+ +
5619 stbi__rewind(p->s);
+
5620 return 0;
+
5621 }
+
5622 if (x)
+
5623 *x = p->s->img_x;
+
5624 if (y)
+
5625 *y = p->s->img_y;
+
5626 if (comp)
+
5627 *comp = p->s->img_n;
+
5628 return 1;
+
5629}
+
5630
+
5631static int stbi__png_info(stbi__context * s, int * x, int * y, int * comp) {
+
5632 stbi__png p;
+
5633 p.s = s;
+
5634 return stbi__png_info_raw(&p, x, y, comp);
+
5635}
+
5636
+
5637static int stbi__png_is16(stbi__context * s) {
+
5638 stbi__png p;
+
5639 p.s = s;
+
5640 if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+
5641 return 0;
+
5642 if (p.depth != 16) {
+
5643 stbi__rewind(p.s);
+
5644 return 0;
+
5645 }
+
5646 return 1;
+
5647}
+
5648#endif
+
5649
+
5650// Microsoft/Windows BMP image
+
5651
+
5652#ifndef STBI_NO_BMP
+
5653static int stbi__bmp_test_raw(stbi__context * s) {
+
5654 int r;
+
5655 int sz;
+
5656 if (stbi__get8(s) != 'B')
+
5657 return 0;
+
5658 if (stbi__get8(s) != 'M')
+
5659 return 0;
+
5660 stbi__get32le(s); // discard filesize
+
5661 stbi__get16le(s); // discard reserved
+
5662 stbi__get16le(s); // discard reserved
+
5663 stbi__get32le(s); // discard data offset
+
5664 sz = stbi__get32le(s);
+
5665 r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+
5666 return r;
+
5667}
+
5668
+
5669static int stbi__bmp_test(stbi__context * s) {
+
5670 int r = stbi__bmp_test_raw(s);
+
5671 stbi__rewind(s);
+
5672 return r;
+
5673}
+
5674
+
5675// returns 0..31 for the highest set bit
+
5676static int stbi__high_bit(unsigned int z) {
+
5677 int n = 0;
+
5678 if (z == 0)
+
5679 return -1;
+
5680 if (z >= 0x10000) {
+
5681 n += 16;
+
5682 z >>= 16;
+
5683 }
+
5684 if (z >= 0x00100) {
+
5685 n += 8;
+
5686 z >>= 8;
+
5687 }
+
5688 if (z >= 0x00010) {
+
5689 n += 4;
+
5690 z >>= 4;
+
5691 }
+
5692 if (z >= 0x00004) {
+
5693 n += 2;
+
5694 z >>= 2;
+
5695 }
+
5696 if (z >= 0x00002) {
+
5697 n += 1; /* >>= 1;*/
+
5698 }
+
5699 return n;
+
5700}
+
5701
+
5702static int stbi__bitcount(unsigned int a) {
+
5703 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
+
5704 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
+
5705 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+
5706 a = (a + (a >> 8)); // max 16 per 8 bits
+
5707 a = (a + (a >> 16)); // max 32 per 8 bits
+
5708 return a & 0xff;
+
5709}
+
5710
+
5711// extract an arbitrarily-aligned N-bit value (N=bits)
+
5712// from v, and then make it 8-bits long and fractionally
+
5713// extend it to full full range.
+
5714static int stbi__shiftsigned(unsigned int v, int shift, int bits) {
+
5715 static unsigned int mul_table[9] = {
+
5716 0,
+
5717 0xff /*0b11111111*/,
+
5718 0x55 /*0b01010101*/,
+
5719 0x49 /*0b01001001*/,
+
5720 0x11 /*0b00010001*/,
+
5721 0x21 /*0b00100001*/,
+
5722 0x41 /*0b01000001*/,
+
5723 0x81 /*0b10000001*/,
+
5724 0x01 /*0b00000001*/,
+
5725 };
+
5726 static unsigned int shift_table[9] = {
+
5727 0, 0, 0, 1, 0, 2, 4, 6, 0,
+
5728 };
+
5729 if (shift < 0)
+
5730 v <<= -shift;
+
5731 else
+
5732 v >>= shift;
+
5733 STBI_ASSERT(v < 256);
+
5734 v >>= (8 - bits);
+
5735 STBI_ASSERT(bits >= 0 && bits <= 8);
+
5736 return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];
+
5737}
+
5738
+
5739typedef struct {
+
5740 int bpp, offset, hsz;
+
5741 unsigned int mr, mg, mb, ma, all_a;
+
5742 int extra_read;
+ +
5744
+ +
5746 // BI_BITFIELDS specifies masks explicitly, don't override
+
5747 if (compress == 3)
+
5748 return 1;
+
5749
+
5750 if (compress == 0) {
+
5751 if (info->bpp == 16) {
+
5752 info->mr = 31u << 10;
+
5753 info->mg = 31u << 5;
+
5754 info->mb = 31u << 0;
+
5755 } else if (info->bpp == 32) {
+
5756 info->mr = 0xffu << 16;
+
5757 info->mg = 0xffu << 8;
+
5758 info->mb = 0xffu << 0;
+
5759 info->ma = 0xffu << 24;
+
5760 info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+
5761 } else {
+
5762 // otherwise, use defaults, which is all-0
+
5763 info->mr = info->mg = info->mb = info->ma = 0;
+
5764 }
+
5765 return 1;
+
5766 }
+
5767 return 0; // error
+
5768}
+
5769
+ +
5771 int hsz;
+
5772 if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M')
+
5773 return stbi__errpuc("not BMP", "Corrupt BMP");
+
5774 stbi__get32le(s); // discard filesize
+
5775 stbi__get16le(s); // discard reserved
+
5776 stbi__get16le(s); // discard reserved
+
5777 info->offset = stbi__get32le(s);
+
5778 info->hsz = hsz = stbi__get32le(s);
+
5779 info->mr = info->mg = info->mb = info->ma = 0;
+
5780 info->extra_read = 14;
+
5781
+
5782 if (info->offset < 0)
+
5783 return stbi__errpuc("bad BMP", "bad BMP");
+
5784
+
5785 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124)
+
5786 return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+
5787 if (hsz == 12) {
+
5788 s->img_x = stbi__get16le(s);
+
5789 s->img_y = stbi__get16le(s);
+
5790 } else {
+
5791 s->img_x = stbi__get32le(s);
+
5792 s->img_y = stbi__get32le(s);
+
5793 }
+
5794 if (stbi__get16le(s) != 1)
+
5795 return stbi__errpuc("bad BMP", "bad BMP");
+
5796 info->bpp = stbi__get16le(s);
+
5797 if (hsz != 12) {
+
5798 int compress = stbi__get32le(s);
+
5799 if (compress == 1 || compress == 2)
+
5800 return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+
5801 if (compress >= 4)
+
5802 return stbi__errpuc("BMP JPEG/PNG",
+
5803 "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+
5804 if (compress == 3 && info->bpp != 16 && info->bpp != 32)
+
5805 return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
+
5806 stbi__get32le(s); // discard sizeof
+
5807 stbi__get32le(s); // discard hres
+
5808 stbi__get32le(s); // discard vres
+
5809 stbi__get32le(s); // discard colorsused
+
5810 stbi__get32le(s); // discard max important
+
5811 if (hsz == 40 || hsz == 56) {
+
5812 if (hsz == 56) {
+ + + + +
5817 }
+
5818 if (info->bpp == 16 || info->bpp == 32) {
+
5819 if (compress == 0) {
+ +
5821 } else if (compress == 3) {
+
5822 info->mr = stbi__get32le(s);
+
5823 info->mg = stbi__get32le(s);
+
5824 info->mb = stbi__get32le(s);
+
5825 info->extra_read += 12;
+
5826 // not documented, but generated by photoshop and handled by mspaint
+
5827 if (info->mr == info->mg && info->mg == info->mb) {
+
5828 // ?!?!?
+
5829 return stbi__errpuc("bad BMP", "bad BMP");
+
5830 }
+
5831 } else
+
5832 return stbi__errpuc("bad BMP", "bad BMP");
+
5833 }
+
5834 } else {
+
5835 // V4/V5 header
+
5836 int i;
+
5837 if (hsz != 108 && hsz != 124)
+
5838 return stbi__errpuc("bad BMP", "bad BMP");
+
5839 info->mr = stbi__get32le(s);
+
5840 info->mg = stbi__get32le(s);
+
5841 info->mb = stbi__get32le(s);
+
5842 info->ma = stbi__get32le(s);
+
5843 if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+ +
5845 stbi__get32le(s); // discard color space
+
5846 for (i = 0; i < 12; ++i)
+
5847 stbi__get32le(s); // discard color space parameters
+
5848 if (hsz == 124) {
+
5849 stbi__get32le(s); // discard rendering intent
+
5850 stbi__get32le(s); // discard offset of profile data
+
5851 stbi__get32le(s); // discard size of profile data
+
5852 stbi__get32le(s); // discard reserved
+
5853 }
+
5854 }
+
5855 }
+
5856 return (void *)1;
+
5857}
+
5858
+
5859static void * stbi__bmp_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
5860 stbi_uc * out;
+
5861 unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
+
5862 stbi_uc pal[256][4];
+
5863 int psize = 0, i, j, width;
+ + + +
5867
+
5868 info.all_a = 255;
+ +
5870 return NULL; // error code already set
+
5871
+
5872 flip_vertically = ((int)s->img_y) > 0;
+
5873 s->img_y = abs((int)s->img_y);
+
5874
+
5875 if (s->img_y > STBI_MAX_DIMENSIONS)
+
5876 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
5877 if (s->img_x > STBI_MAX_DIMENSIONS)
+
5878 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
5879
+
5880 mr = info.mr;
+
5881 mg = info.mg;
+
5882 mb = info.mb;
+
5883 ma = info.ma;
+
5884 all_a = info.all_a;
+
5885
+
5886 if (info.hsz == 12) {
+
5887 if (info.bpp < 24)
+
5888 psize = (info.offset - info.extra_read - 24) / 3;
+
5889 } else {
+
5890 if (info.bpp < 16)
+
5891 psize = (info.offset - info.extra_read - info.hsz) >> 2;
+
5892 }
+
5893 if (psize == 0) {
+
5894 // accept some number of extra bytes after the header, but if the offset points either to before
+
5895 // the header ends or implies a large amount of extra data, reject the file as malformed
+
5896 int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
+
5897 int header_limit = 1024; // max we actually read is below 256 bytes currently.
+
5898 int extra_data_limit = 256 * 4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
+ +
5900 return stbi__errpuc("bad header", "Corrupt BMP");
+
5901 }
+
5902 // we established that bytes_read_so_far is positive and sensible.
+
5903 // the first half of this test rejects offsets that are either too small positives, or
+
5904 // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
+
5905 // ensures the number computed in the second half of the test can't overflow.
+
5906 if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
+
5907 return stbi__errpuc("bad offset", "Corrupt BMP");
+
5908 } else {
+ +
5910 }
+
5911 }
+
5912
+
5913 if (info.bpp == 24 && ma == 0xff000000)
+
5914 s->img_n = 3;
+
5915 else
+
5916 s->img_n = ma ? 4 : 3;
+
5917 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+
5918 target = req_comp;
+
5919 else
+
5920 target = s->img_n; // if they want monochrome, we'll post-convert
+
5921
+
5922 // sanity-check size
+
5923 if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+
5924 return stbi__errpuc("too large", "Corrupt BMP");
+
5925
+
5926 out = (stbi_uc *)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+
5927 if (!out)
+
5928 return stbi__errpuc("outofmem", "Out of memory");
+
5929 if (info.bpp < 16) {
+
5930 int z = 0;
+
5931 if (psize == 0 || psize > 256) {
+
5932 STBI_FREE(out);
+
5933 return stbi__errpuc("invalid", "Corrupt BMP");
+
5934 }
+
5935 for (i = 0; i < psize; ++i) {
+
5936 pal[i][2] = stbi__get8(s);
+
5937 pal[i][1] = stbi__get8(s);
+
5938 pal[i][0] = stbi__get8(s);
+
5939 if (info.hsz != 12)
+
5940 stbi__get8(s);
+
5941 pal[i][3] = 255;
+
5942 }
+
5943 stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+
5944 if (info.bpp == 1)
+
5945 width = (s->img_x + 7) >> 3;
+
5946 else if (info.bpp == 4)
+
5947 width = (s->img_x + 1) >> 1;
+
5948 else if (info.bpp == 8)
+
5949 width = s->img_x;
+
5950 else {
+
5951 STBI_FREE(out);
+
5952 return stbi__errpuc("bad bpp", "Corrupt BMP");
+
5953 }
+
5954 pad = (-width) & 3;
+
5955 if (info.bpp == 1) {
+
5956 for (j = 0; j < (int)s->img_y; ++j) {
+
5957 int bit_offset = 7, v = stbi__get8(s);
+
5958 for (i = 0; i < (int)s->img_x; ++i) {
+
5959 int color = (v >> bit_offset) & 0x1;
+
5960 out[z++] = pal[color][0];
+
5961 out[z++] = pal[color][1];
+
5962 out[z++] = pal[color][2];
+
5963 if (target == 4)
+
5964 out[z++] = 255;
+
5965 if (i + 1 == (int)s->img_x)
+
5966 break;
+
5967 if ((--bit_offset) < 0) {
+
5968 bit_offset = 7;
+
5969 v = stbi__get8(s);
+
5970 }
+
5971 }
+
5972 stbi__skip(s, pad);
+
5973 }
+
5974 } else {
+
5975 for (j = 0; j < (int)s->img_y; ++j) {
+
5976 for (i = 0; i < (int)s->img_x; i += 2) {
+
5977 int v = stbi__get8(s), v2 = 0;
+
5978 if (info.bpp == 4) {
+
5979 v2 = v & 15;
+
5980 v >>= 4;
+
5981 }
+
5982 out[z++] = pal[v][0];
+
5983 out[z++] = pal[v][1];
+
5984 out[z++] = pal[v][2];
+
5985 if (target == 4)
+
5986 out[z++] = 255;
+
5987 if (i + 1 == (int)s->img_x)
+
5988 break;
+
5989 v = (info.bpp == 8) ? stbi__get8(s) : v2;
+
5990 out[z++] = pal[v][0];
+
5991 out[z++] = pal[v][1];
+
5992 out[z++] = pal[v][2];
+
5993 if (target == 4)
+
5994 out[z++] = 255;
+
5995 }
+
5996 stbi__skip(s, pad);
+
5997 }
+
5998 }
+
5999 } else {
+
6000 int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
+
6001 int z = 0;
+
6002 int easy = 0;
+
6003 stbi__skip(s, info.offset - info.extra_read - info.hsz);
+
6004 if (info.bpp == 24)
+
6005 width = 3 * s->img_x;
+
6006 else if (info.bpp == 16)
+
6007 width = 2 * s->img_x;
+
6008 else /* bpp = 32 and pad = 0 */
+
6009 width = 0;
+
6010 pad = (-width) & 3;
+
6011 if (info.bpp == 24) {
+
6012 easy = 1;
+
6013 } else if (info.bpp == 32) {
+
6014 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+
6015 easy = 2;
+
6016 }
+
6017 if (!easy) {
+
6018 if (!mr || !mg || !mb) {
+
6019 STBI_FREE(out);
+
6020 return stbi__errpuc("bad masks", "Corrupt BMP");
+
6021 }
+
6022 // right shift amt to put high bit in position #7
+
6023 rshift = stbi__high_bit(mr) - 7;
+ +
6025 gshift = stbi__high_bit(mg) - 7;
+ +
6027 bshift = stbi__high_bit(mb) - 7;
+ +
6029 ashift = stbi__high_bit(ma) - 7;
+ +
6031 if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) {
+
6032 STBI_FREE(out);
+
6033 return stbi__errpuc("bad masks", "Corrupt BMP");
+
6034 }
+
6035 }
+
6036 for (j = 0; j < (int)s->img_y; ++j) {
+
6037 if (easy) {
+
6038 for (i = 0; i < (int)s->img_x; ++i) {
+
6039 unsigned char a;
+
6040 out[z + 2] = stbi__get8(s);
+
6041 out[z + 1] = stbi__get8(s);
+
6042 out[z + 0] = stbi__get8(s);
+
6043 z += 3;
+
6044 a = (easy == 2 ? stbi__get8(s) : 255);
+
6045 all_a |= a;
+
6046 if (target == 4)
+
6047 out[z++] = a;
+
6048 }
+
6049 } else {
+
6050 int bpp = info.bpp;
+
6051 for (i = 0; i < (int)s->img_x; ++i) {
+ +
6053 unsigned int a;
+ + + +
6057 a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+
6058 all_a |= a;
+
6059 if (target == 4)
+
6060 out[z++] = STBI__BYTECAST(a);
+
6061 }
+
6062 }
+
6063 stbi__skip(s, pad);
+
6064 }
+
6065 }
+
6066
+
6067 // if alpha channel is all 0s, replace with all 255s
+
6068 if (target == 4 && all_a == 0)
+
6069 for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)
+
6070 out[i] = 255;
+
6071
+
6072 if (flip_vertically) {
+
6073 stbi_uc t;
+
6074 for (j = 0; j < (int)s->img_y >> 1; ++j) {
+
6075 stbi_uc * p1 = out + j * s->img_x * target;
+
6076 stbi_uc * p2 = out + (s->img_y - 1 - j) * s->img_x * target;
+
6077 for (i = 0; i < (int)s->img_x * target; ++i) {
+
6078 t = p1[i];
+
6079 p1[i] = p2[i];
+
6080 p2[i] = t;
+
6081 }
+
6082 }
+
6083 }
+
6084
+
6085 if (req_comp && req_comp != target) {
+
6086 out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+
6087 if (out == NULL)
+
6088 return out; // stbi__convert_format frees input on failure
+
6089 }
+
6090
+
6091 *x = s->img_x;
+
6092 *y = s->img_y;
+
6093 if (comp)
+
6094 *comp = s->img_n;
+
6095 return out;
+
6096}
+
6097#endif
+
6098
+
6099// Targa Truevision - TGA
+
6100// by Jonathan Dummer
+
6101#ifndef STBI_NO_TGA
+
6102// returns STBI_rgb or whatever, 0 on error
+
6103static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int * is_rgb16) {
+
6104 // only RGB or RGBA (incl. 16bit) or grey allowed
+
6105 if (is_rgb16)
+
6106 *is_rgb16 = 0;
+
6107 switch (bits_per_pixel) {
+
6108 case 8:
+
6109 return STBI_grey;
+
6110 case 16:
+
6111 if (is_grey)
+
6112 return STBI_grey_alpha;
+
6113 // fallthrough
+
6114 case 15:
+
6115 if (is_rgb16)
+
6116 *is_rgb16 = 1;
+
6117 return STBI_rgb;
+
6118 case 24: // fallthrough
+
6119 case 32:
+
6120 return bits_per_pixel / 8;
+
6121 default:
+
6122 return 0;
+
6123 }
+
6124}
+
6125
+
6126static int stbi__tga_info(stbi__context * s, int * x, int * y, int * comp) {
+ +
6128 int sz, tga_colormap_type;
+
6129 stbi__get8(s); // discard Offset
+
6130 tga_colormap_type = stbi__get8(s); // colormap type
+
6131 if (tga_colormap_type > 1) {
+
6132 stbi__rewind(s);
+
6133 return 0; // only RGB or indexed allowed
+
6134 }
+
6135 tga_image_type = stbi__get8(s); // image type
+
6136 if (tga_colormap_type == 1) { // colormapped (paletted) image
+
6137 if (tga_image_type != 1 && tga_image_type != 9) {
+
6138 stbi__rewind(s);
+
6139 return 0;
+
6140 }
+
6141 stbi__skip(s, 4); // skip index of first colormap entry and number of entries
+
6142 sz = stbi__get8(s); // check bits per palette color entry
+
6143 if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
+
6144 stbi__rewind(s);
+
6145 return 0;
+
6146 }
+
6147 stbi__skip(s, 4); // skip image x and y origin
+ +
6149 } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+
6150 if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {
+
6151 stbi__rewind(s);
+
6152 return 0; // only RGB or grey allowed, +/- RLE
+
6153 }
+
6154 stbi__skip(s, 9); // skip colormap specification and image x/y origin
+
6155 tga_colormap_bpp = 0;
+
6156 }
+ +
6158 if (tga_w < 1) {
+
6159 stbi__rewind(s);
+
6160 return 0; // test width
+
6161 }
+ +
6163 if (tga_h < 1) {
+
6164 stbi__rewind(s);
+
6165 return 0; // test height
+
6166 }
+
6167 tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+
6168 stbi__get8(s); // ignore alpha bits
+
6169 if (tga_colormap_bpp != 0) {
+
6170 if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+
6171 // when using a colormap, tga_bits_per_pixel is the size of the indexes
+
6172 // I don't think anything but 8 or 16bit indexes makes sense
+
6173 stbi__rewind(s);
+
6174 return 0;
+
6175 }
+ +
6177 } else {
+ +
6179 }
+
6180 if (!tga_comp) {
+
6181 stbi__rewind(s);
+
6182 return 0;
+
6183 }
+
6184 if (x)
+
6185 *x = tga_w;
+
6186 if (y)
+
6187 *y = tga_h;
+
6188 if (comp)
+
6189 *comp = tga_comp;
+
6190 return 1; // seems to have passed everything
+
6191}
+
6192
+
6193static int stbi__tga_test(stbi__context * s) {
+
6194 int res = 0;
+
6195 int sz, tga_color_type;
+
6196 stbi__get8(s); // discard Offset
+
6197 tga_color_type = stbi__get8(s); // color type
+
6198 if (tga_color_type > 1)
+
6199 goto errorEnd; // only RGB or indexed allowed
+
6200 sz = stbi__get8(s); // image type
+
6201 if (tga_color_type == 1) { // colormapped (paletted) image
+
6202 if (sz != 1 && sz != 9)
+
6203 goto errorEnd; // colortype 1 demands image type 1 or 9
+
6204 stbi__skip(s, 4); // skip index of first colormap entry and number of entries
+
6205 sz = stbi__get8(s); // check bits per palette color entry
+
6206 if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
+
6207 goto errorEnd;
+
6208 stbi__skip(s, 4); // skip image x and y origin
+
6209 } else { // "normal" image w/o colormap
+
6210 if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11))
+
6211 goto errorEnd; // only RGB or grey allowed, +/- RLE
+
6212 stbi__skip(s, 9); // skip colormap specification and image x/y origin
+
6213 }
+
6214 if (stbi__get16le(s) < 1)
+
6215 goto errorEnd; // test width
+
6216 if (stbi__get16le(s) < 1)
+
6217 goto errorEnd; // test height
+
6218 sz = stbi__get8(s); // bits per pixel
+
6219 if ((tga_color_type == 1) && (sz != 8) && (sz != 16))
+
6220 goto errorEnd; // for colormapped images, bpp is size of an index
+
6221 if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
+
6222 goto errorEnd;
+
6223
+
6224 res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
6225
+
6226errorEnd:
+
6227 stbi__rewind(s);
+
6228 return res;
+
6229}
+
6230
+
6231// read 16bit value and convert to 24bit RGB
+ + + +
6235 // we have 3 channels with 5bits each
+
6236 int r = (px >> 10) & fiveBitMask;
+
6237 int g = (px >> 5) & fiveBitMask;
+
6238 int b = px & fiveBitMask;
+
6239 // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+
6240 out[0] = (stbi_uc)((r * 255) / 31);
+
6241 out[1] = (stbi_uc)((g * 255) / 31);
+
6242 out[2] = (stbi_uc)((b * 255) / 31);
+
6243
+
6244 // some people claim that the most significant bit might be used for alpha
+
6245 // (possibly if an alpha-bit is set in the "image descriptor byte")
+
6246 // but that only made 16bit test images completely translucent..
+
6247 // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+
6248}
+
6249
+
6250static void * stbi__tga_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
6251 // read in the TGA header stuff
+
6252 int tga_offset = stbi__get8(s);
+
6253 int tga_indexed = stbi__get8(s);
+ +
6255 int tga_is_RLE = 0;
+ + + + + +
6261 int tga_width = stbi__get16le(s);
+
6262 int tga_height = stbi__get16le(s);
+ +
6264 int tga_comp, tga_rgb16 = 0;
+
6265 int tga_inverted = stbi__get8(s);
+
6266 // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+
6267 // image data
+
6268 unsigned char * tga_data;
+
6269 unsigned char * tga_palette = NULL;
+
6270 int i, j;
+
6271 unsigned char raw_data[4] = {0};
+
6272 int RLE_count = 0;
+
6273 int RLE_repeating = 0;
+
6274 int read_next_pixel = 1;
+ +
6276 STBI_NOTUSED(tga_x_origin); // @TODO
+
6277 STBI_NOTUSED(tga_y_origin); // @TODO
+
6278
+ +
6280 return stbi__errpuc("too large", "Very large image (corrupt?)");
+ +
6282 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
6283
+
6284 // do a tiny bit of precessing
+
6285 if (tga_image_type >= 8) {
+
6286 tga_image_type -= 8;
+
6287 tga_is_RLE = 1;
+
6288 }
+
6289 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
6290
+
6291 // If I'm paletted, then I'll use the number of bits from the palette
+
6292 if (tga_indexed)
+ +
6294 else
+ +
6296
+
6297 if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+
6298 return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
6299
+
6300 // tga info
+
6301 *x = tga_width;
+
6302 *y = tga_height;
+
6303 if (comp)
+
6304 *comp = tga_comp;
+
6305
+ +
6307 return stbi__errpuc("too large", "Corrupt TGA");
+
6308
+
6309 tga_data = (unsigned char *)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+
6310 if (!tga_data)
+
6311 return stbi__errpuc("outofmem", "Out of memory");
+
6312
+
6313 // skip to the data's starting position (offset usually = 0)
+ +
6315
+
6316 if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
+
6317 for (i = 0; i < tga_height; ++i) {
+
6318 int row = tga_inverted ? tga_height - i - 1 : i;
+ + +
6321 }
+
6322 } else {
+
6323 // do I need to load a palette?
+
6324 if (tga_indexed) {
+
6325 if (tga_palette_len == 0) { /* you have to have at least one entry! */
+ +
6327 return stbi__errpuc("bad palette", "Corrupt TGA");
+
6328 }
+
6329
+
6330 // any data to skip? (offset usually = 0)
+ +
6332 // load the palette
+
6333 tga_palette = (unsigned char *)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+
6334 if (!tga_palette) {
+ +
6336 return stbi__errpuc("outofmem", "Out of memory");
+
6337 }
+
6338 if (tga_rgb16) {
+ +
6340 STBI_ASSERT(tga_comp == STBI_rgb);
+
6341 for (i = 0; i < tga_palette_len; ++i) {
+ + +
6344 }
+
6345 } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+ + +
6348 return stbi__errpuc("bad palette", "Corrupt TGA");
+
6349 }
+
6350 }
+
6351 // load the data
+
6352 for (i = 0; i < tga_width * tga_height; ++i) {
+
6353 // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+
6354 if (tga_is_RLE) {
+
6355 if (RLE_count == 0) {
+
6356 // yep, get the next byte as a RLE command
+
6357 int RLE_cmd = stbi__get8(s);
+
6358 RLE_count = 1 + (RLE_cmd & 127);
+
6359 RLE_repeating = RLE_cmd >> 7;
+
6360 read_next_pixel = 1;
+
6361 } else if (!RLE_repeating) {
+
6362 read_next_pixel = 1;
+
6363 }
+
6364 } else {
+
6365 read_next_pixel = 1;
+
6366 }
+
6367 // OK, if I need to read a pixel, do it now
+
6368 if (read_next_pixel) {
+
6369 // load however much data we did have
+
6370 if (tga_indexed) {
+
6371 // read in index, then perform the lookup
+ +
6373 if (pal_idx >= tga_palette_len) {
+
6374 // invalid index
+
6375 pal_idx = 0;
+
6376 }
+
6377 pal_idx *= tga_comp;
+
6378 for (j = 0; j < tga_comp; ++j) {
+ +
6380 }
+
6381 } else if (tga_rgb16) {
+
6382 STBI_ASSERT(tga_comp == STBI_rgb);
+ +
6384 } else {
+
6385 // read in the data raw
+
6386 for (j = 0; j < tga_comp; ++j) {
+
6387 raw_data[j] = stbi__get8(s);
+
6388 }
+
6389 }
+
6390 // clear the reading flag for the next pixel
+
6391 read_next_pixel = 0;
+
6392 } // end of reading a pixel
+
6393
+
6394 // copy data
+
6395 for (j = 0; j < tga_comp; ++j)
+
6396 tga_data[i * tga_comp + j] = raw_data[j];
+
6397
+
6398 // in case we're in RLE mode, keep counting down
+
6399 --RLE_count;
+
6400 }
+
6401 // do I need to invert the image?
+
6402 if (tga_inverted) {
+
6403 for (j = 0; j * 2 < tga_height; ++j) {
+
6404 int index1 = j * tga_width * tga_comp;
+
6405 int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+
6406 for (i = tga_width * tga_comp; i > 0; --i) {
+
6407 unsigned char temp = tga_data[index1];
+ +
6409 tga_data[index2] = temp;
+
6410 ++index1;
+
6411 ++index2;
+
6412 }
+
6413 }
+
6414 }
+
6415 // clear my palette, if I had one
+
6416 if (tga_palette != NULL) {
+ +
6418 }
+
6419 }
+
6420
+
6421 // swap RGB - if the source data was RGB16, it already is in the right order
+
6422 if (tga_comp >= 3 && !tga_rgb16) {
+
6423 unsigned char * tga_pixel = tga_data;
+
6424 for (i = 0; i < tga_width * tga_height; ++i) {
+
6425 unsigned char temp = tga_pixel[0];
+
6426 tga_pixel[0] = tga_pixel[2];
+
6427 tga_pixel[2] = temp;
+ +
6429 }
+
6430 }
+
6431
+
6432 // convert to target component count
+
6433 if (req_comp && req_comp != tga_comp)
+ +
6435
+
6436 // the things I do to get rid of an error message, and yet keep
+
6437 // Microsoft's C compilers happy... [8^(
+ + +
6440 // OK, done
+
6441 return tga_data;
+
6442}
+
6443#endif
+
6444
+
6445// *************************************************************************************************
+
6446// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
6447
+
6448#ifndef STBI_NO_PSD
+
6449static int stbi__psd_test(stbi__context * s) {
+
6450 int r = (stbi__get32be(s) == 0x38425053);
+
6451 stbi__rewind(s);
+
6452 return r;
+
6453}
+
6454
+
6455static int stbi__psd_decode_rle(stbi__context * s, stbi_uc * p, int pixelCount) {
+
6456 int count, nleft, len;
+
6457
+
6458 count = 0;
+
6459 while ((nleft = pixelCount - count) > 0) {
+
6460 len = stbi__get8(s);
+
6461 if (len == 128) {
+
6462 // No-op.
+
6463 } else if (len < 128) {
+
6464 // Copy next len+1 bytes literally.
+
6465 len++;
+
6466 if (len > nleft)
+
6467 return 0; // corrupt data
+
6468 count += len;
+
6469 while (len) {
+
6470 *p = stbi__get8(s);
+
6471 p += 4;
+
6472 len--;
+
6473 }
+
6474 } else if (len > 128) {
+
6475 stbi_uc val;
+
6476 // Next -len+1 bytes in the dest are replicated from next source byte.
+
6477 // (Interpret len as a negative 8-bit int.)
+
6478 len = 257 - len;
+
6479 if (len > nleft)
+
6480 return 0; // corrupt data
+
6481 val = stbi__get8(s);
+
6482 count += len;
+
6483 while (len) {
+
6484 *p = val;
+
6485 p += 4;
+
6486 len--;
+
6487 }
+
6488 }
+
6489 }
+
6490
+
6491 return 1;
+
6492}
+
6493
+
6494static void * stbi__psd_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri, int bpc) {
+
6495 int pixelCount;
+ +
6497 int channel, i;
+
6498 int bitdepth;
+
6499 int w, h;
+
6500 stbi_uc * out;
+ +
6502
+
6503 // Check identifier
+
6504 if (stbi__get32be(s) != 0x38425053) // "8BPS"
+
6505 return stbi__errpuc("not PSD", "Corrupt PSD image");
+
6506
+
6507 // Check file type version.
+
6508 if (stbi__get16be(s) != 1)
+
6509 return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
6510
+
6511 // Skip 6 reserved bytes.
+
6512 stbi__skip(s, 6);
+
6513
+
6514 // Read the number of channels (R, G, B, A, etc).
+ + +
6517 return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
6518
+
6519 // Read the rows and columns of the image.
+
6520 h = stbi__get32be(s);
+
6521 w = stbi__get32be(s);
+
6522
+
6523 if (h > STBI_MAX_DIMENSIONS)
+
6524 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
6525 if (w > STBI_MAX_DIMENSIONS)
+
6526 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
6527
+
6528 // Make sure the depth is 8 bits.
+ +
6530 if (bitdepth != 8 && bitdepth != 16)
+
6531 return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
6532
+
6533 // Make sure the color mode is RGB.
+
6534 // Valid options are:
+
6535 // 0: Bitmap
+
6536 // 1: Grayscale
+
6537 // 2: Indexed color
+
6538 // 3: RGB color
+
6539 // 4: CMYK color
+
6540 // 7: Multichannel
+
6541 // 8: Duotone
+
6542 // 9: Lab color
+
6543 if (stbi__get16be(s) != 3)
+
6544 return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
6545
+
6546 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
+ +
6548
+
6549 // Skip the image resources. (resolution, pen tool paths, etc)
+ +
6551
+
6552 // Skip the reserved data.
+ +
6554
+
6555 // Find out if the data is compressed.
+
6556 // Known values:
+
6557 // 0: no compression
+
6558 // 1: RLE compressed
+ +
6560 if (compression > 1)
+
6561 return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
6562
+
6563 // Check size
+
6564 if (!stbi__mad3sizes_valid(4, w, h, 0))
+
6565 return stbi__errpuc("too large", "Corrupt PSD");
+
6566
+
6567 // Create the destination image.
+
6568
+
6569 if (!compression && bitdepth == 16 && bpc == 16) {
+
6570 out = (stbi_uc *)stbi__malloc_mad3(8, w, h, 0);
+
6571 ri->bits_per_channel = 16;
+
6572 } else
+
6573 out = (stbi_uc *)stbi__malloc(4 * w * h);
+
6574
+
6575 if (!out)
+
6576 return stbi__errpuc("outofmem", "Out of memory");
+
6577 pixelCount = w * h;
+
6578
+
6579 // Initialize the data to zero.
+
6580 // memset( out, 0, pixelCount * 4 );
+
6581
+
6582 // Finally, the image data.
+
6583 if (compression) {
+
6584 // RLE as used by .PSD and .TIFF
+
6585 // Loop until you get the number of unpacked bytes you are expecting:
+
6586 // Read the next source byte into n.
+
6587 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+
6588 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+
6589 // Else if n is 128, noop.
+
6590 // Endloop
+
6591
+
6592 // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
+
6593 // which we're going to just skip.
+
6594 stbi__skip(s, h * channelCount * 2);
+
6595
+
6596 // Read the RLE data by channel.
+
6597 for (channel = 0; channel < 4; channel++) {
+
6598 stbi_uc * p;
+
6599
+
6600 p = out + channel;
+
6601 if (channel >= channelCount) {
+
6602 // Fill this channel with default data.
+
6603 for (i = 0; i < pixelCount; i++, p += 4)
+
6604 *p = (channel == 3 ? 255 : 0);
+
6605 } else {
+
6606 // Read the RLE data.
+
6607 if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+
6608 STBI_FREE(out);
+
6609 return stbi__errpuc("corrupt", "bad RLE data");
+
6610 }
+
6611 }
+
6612 }
+
6613 } else {
+
6614 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
+
6615 // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
6616
+
6617 // Read the data by channel.
+
6618 for (channel = 0; channel < 4; channel++) {
+
6619 if (channel >= channelCount) {
+
6620 // Fill this channel with default data.
+
6621 if (bitdepth == 16 && bpc == 16) {
+
6622 stbi__uint16 * q = ((stbi__uint16 *)out) + channel;
+
6623 stbi__uint16 val = channel == 3 ? 65535 : 0;
+
6624 for (i = 0; i < pixelCount; i++, q += 4)
+
6625 *q = val;
+
6626 } else {
+
6627 stbi_uc * p = out + channel;
+
6628 stbi_uc val = channel == 3 ? 255 : 0;
+
6629 for (i = 0; i < pixelCount; i++, p += 4)
+
6630 *p = val;
+
6631 }
+
6632 } else {
+
6633 if (ri->bits_per_channel == 16) { // output bpc
+
6634 stbi__uint16 * q = ((stbi__uint16 *)out) + channel;
+
6635 for (i = 0; i < pixelCount; i++, q += 4)
+ +
6637 } else {
+
6638 stbi_uc * p = out + channel;
+
6639 if (bitdepth == 16) { // input bpc
+
6640 for (i = 0; i < pixelCount; i++, p += 4)
+
6641 *p = (stbi_uc)(stbi__get16be(s) >> 8);
+
6642 } else {
+
6643 for (i = 0; i < pixelCount; i++, p += 4)
+
6644 *p = stbi__get8(s);
+
6645 }
+
6646 }
+
6647 }
+
6648 }
+
6649 }
+
6650
+
6651 // remove weird white matte from PSD
+
6652 if (channelCount >= 4) {
+
6653 if (ri->bits_per_channel == 16) {
+
6654 for (i = 0; i < w * h; ++i) {
+
6655 stbi__uint16 * pixel = (stbi__uint16 *)out + 4 * i;
+
6656 if (pixel[3] != 0 && pixel[3] != 65535) {
+
6657 float a = pixel[3] / 65535.0f;
+
6658 float ra = 1.0f / a;
+
6659 float inv_a = 65535.0f * (1 - ra);
+
6660 pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
+
6661 pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
+
6662 pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
+
6663 }
+
6664 }
+
6665 } else {
+
6666 for (i = 0; i < w * h; ++i) {
+
6667 unsigned char * pixel = out + 4 * i;
+
6668 if (pixel[3] != 0 && pixel[3] != 255) {
+
6669 float a = pixel[3] / 255.0f;
+
6670 float ra = 1.0f / a;
+
6671 float inv_a = 255.0f * (1 - ra);
+
6672 pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
+
6673 pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
+
6674 pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
+
6675 }
+
6676 }
+
6677 }
+
6678 }
+
6679
+
6680 // convert to desired output format
+
6681 if (req_comp && req_comp != 4) {
+
6682 if (ri->bits_per_channel == 16)
+ +
6684 else
+ +
6686 if (out == NULL)
+
6687 return out; // stbi__convert_format frees input on failure
+
6688 }
+
6689
+
6690 if (comp)
+
6691 *comp = 4;
+
6692 *y = h;
+
6693 *x = w;
+
6694
+
6695 return out;
+
6696}
+
6697#endif
+
6698
+
6699// *************************************************************************************************
+
6700// Softimage PIC loader
+
6701// by Tom Seddon
+
6702//
+
6703// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+
6704// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
6705
+
6706#ifndef STBI_NO_PIC
+
6707static int stbi__pic_is4(stbi__context * s, const char * str) {
+
6708 int i;
+
6709 for (i = 0; i < 4; ++i)
+
6710 if (stbi__get8(s) != (stbi_uc)str[i])
+
6711 return 0;
+
6712
+
6713 return 1;
+
6714}
+
6715
+
6716static int stbi__pic_test_core(stbi__context * s) {
+
6717 int i;
+
6718
+
6719 if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
+
6720 return 0;
+
6721
+
6722 for (i = 0; i < 84; ++i)
+
6723 stbi__get8(s);
+
6724
+
6725 if (!stbi__pic_is4(s, "PICT"))
+
6726 return 0;
+
6727
+
6728 return 1;
+
6729}
+
6730
+
6731typedef struct {
+
6732 stbi_uc size, type, channel;
+ +
6734
+ +
6736 int mask = 0x80, i;
+
6737
+
6738 for (i = 0; i < 4; ++i, mask >>= 1) {
+
6739 if (channel & mask) {
+
6740 if (stbi__at_eof(s))
+
6741 return stbi__errpuc("bad file", "PIC file too short");
+
6742 dest[i] = stbi__get8(s);
+
6743 }
+
6744 }
+
6745
+
6746 return dest;
+
6747}
+
6748
+
6749static void stbi__copyval(int channel, stbi_uc * dest, const stbi_uc * src) {
+
6750 int mask = 0x80, i;
+
6751
+
6752 for (i = 0; i < 4; ++i, mask >>= 1)
+
6753 if (channel & mask)
+
6754 dest[i] = src[i];
+
6755}
+
6756
+
6757static stbi_uc * stbi__pic_load_core(stbi__context * s, int width, int height, int * comp, stbi_uc * result) {
+
6758 int act_comp = 0, num_packets = 0, y, chained;
+ +
6760
+
6761 // this will (should...) cater for even some bizarre stuff like having data
+
6762 // for the same channel in multiple packets.
+
6763 do {
+ +
6765
+
6766 if (num_packets == sizeof(packets) / sizeof(packets[0]))
+
6767 return stbi__errpuc("bad format", "too many packets");
+
6768
+ +
6770
+
6771 chained = stbi__get8(s);
+
6772 packet->size = stbi__get8(s);
+
6773 packet->type = stbi__get8(s);
+
6774 packet->channel = stbi__get8(s);
+
6775
+
6776 act_comp |= packet->channel;
+
6777
+
6778 if (stbi__at_eof(s))
+
6779 return stbi__errpuc("bad file", "file too short (reading packets)");
+
6780 if (packet->size != 8)
+
6781 return stbi__errpuc("bad format", "packet isn't 8bpp");
+
6782 } while (chained);
+
6783
+
6784 *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
6785
+
6786 for (y = 0; y < height; ++y) {
+
6787 int packet_idx;
+
6788
+ + +
6791 stbi_uc * dest = result + y * width * 4;
+
6792
+
6793 switch (packet->type) {
+
6794 default:
+
6795 return stbi__errpuc("bad format", "packet has bad compression type");
+
6796
+
6797 case 0: { // uncompressed
+
6798 int x;
+
6799
+
6800 for (x = 0; x < width; ++x, dest += 4)
+
6801 if (!stbi__readval(s, packet->channel, dest))
+
6802 return 0;
+
6803 break;
+
6804 }
+
6805
+
6806 case 1: // Pure RLE
+
6807 {
+
6808 int left = width, i;
+
6809
+
6810 while (left > 0) {
+
6811 stbi_uc count, value[4];
+
6812
+
6813 count = stbi__get8(s);
+
6814 if (stbi__at_eof(s))
+
6815 return stbi__errpuc("bad file", "file too short (pure read count)");
+
6816
+
6817 if (count > left)
+
6818 count = (stbi_uc)left;
+
6819
+
6820 if (!stbi__readval(s, packet->channel, value))
+
6821 return 0;
+
6822
+
6823 for (i = 0; i < count; ++i, dest += 4)
+
6824 stbi__copyval(packet->channel, dest, value);
+
6825 left -= count;
+
6826 }
+
6827 } break;
+
6828
+
6829 case 2: { // Mixed RLE
+
6830 int left = width;
+
6831 while (left > 0) {
+
6832 int count = stbi__get8(s), i;
+
6833 if (stbi__at_eof(s))
+
6834 return stbi__errpuc("bad file", "file too short (mixed read count)");
+
6835
+
6836 if (count >= 128) { // Repeated
+
6837 stbi_uc value[4];
+
6838
+
6839 if (count == 128)
+ +
6841 else
+
6842 count -= 127;
+
6843 if (count > left)
+
6844 return stbi__errpuc("bad file", "scanline overrun");
+
6845
+
6846 if (!stbi__readval(s, packet->channel, value))
+
6847 return 0;
+
6848
+
6849 for (i = 0; i < count; ++i, dest += 4)
+
6850 stbi__copyval(packet->channel, dest, value);
+
6851 } else { // Raw
+
6852 ++count;
+
6853 if (count > left)
+
6854 return stbi__errpuc("bad file", "scanline overrun");
+
6855
+
6856 for (i = 0; i < count; ++i, dest += 4)
+
6857 if (!stbi__readval(s, packet->channel, dest))
+
6858 return 0;
+
6859 }
+
6860 left -= count;
+
6861 }
+
6862 break;
+
6863 }
+
6864 }
+
6865 }
+
6866 }
+
6867
+
6868 return result;
+
6869}
+
6870
+
6871static void * stbi__pic_load(stbi__context * s, int * px, int * py, int * comp, int req_comp, stbi__result_info * ri) {
+
6872 stbi_uc * result;
+
6873 int i, x, y, internal_comp;
+ +
6875
+
6876 if (!comp)
+ +
6878
+
6879 for (i = 0; i < 92; ++i)
+
6880 stbi__get8(s);
+
6881
+
6882 x = stbi__get16be(s);
+
6883 y = stbi__get16be(s);
+
6884
+
6885 if (y > STBI_MAX_DIMENSIONS)
+
6886 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
6887 if (x > STBI_MAX_DIMENSIONS)
+
6888 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
6889
+
6890 if (stbi__at_eof(s))
+
6891 return stbi__errpuc("bad file", "file too short (pic header)");
+
6892 if (!stbi__mad3sizes_valid(x, y, 4, 0))
+
6893 return stbi__errpuc("too large", "PIC image too large to decode");
+
6894
+
6895 stbi__get32be(s); // skip `ratio'
+
6896 stbi__get16be(s); // skip `fields'
+
6897 stbi__get16be(s); // skip `pad'
+
6898
+
6899 // intermediate buffer is RGBA
+
6900 result = (stbi_uc *)stbi__malloc_mad3(x, y, 4, 0);
+
6901 if (!result)
+
6902 return stbi__errpuc("outofmem", "Out of memory");
+
6903 memset(result, 0xff, x * y * 4);
+
6904
+
6905 if (!stbi__pic_load_core(s, x, y, comp, result)) {
+ +
6907 result = 0;
+
6908 }
+
6909 *px = x;
+
6910 *py = y;
+
6911 if (req_comp == 0)
+
6912 req_comp = *comp;
+ +
6914
+
6915 return result;
+
6916}
+
6917
+
6918static int stbi__pic_test(stbi__context * s) {
+
6919 int r = stbi__pic_test_core(s);
+
6920 stbi__rewind(s);
+
6921 return r;
+
6922}
+
6923#endif
+
6924
+
6925// *************************************************************************************************
+
6926// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
6927
+
6928#ifndef STBI_NO_GIF
+
6929typedef struct {
+ +
6931 stbi_uc first;
+ + +
6934
+
6935typedef struct {
+
6936 int w, h;
+
6937 stbi_uc * out; // output buffer (always 4 components)
+
6938 stbi_uc * background; // The current "background" as far as a gif is concerned
+
6939 stbi_uc * history;
+ +
6941 stbi_uc pal[256][4];
+
6942 stbi_uc lpal[256][4];
+
6943 stbi__gif_lzw codes[8192];
+ +
6945 int parse, step;
+
6946 int lflags;
+
6947 int start_x, start_y;
+
6948 int max_x, max_y;
+
6949 int cur_x, cur_y;
+
6950 int line_size;
+
6951 int delay;
+
6952} stbi__gif;
+
6953
+
6954static int stbi__gif_test_raw(stbi__context * s) {
+
6955 int sz;
+
6956 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+
6957 return 0;
+
6958 sz = stbi__get8(s);
+
6959 if (sz != '9' && sz != '7')
+
6960 return 0;
+
6961 if (stbi__get8(s) != 'a')
+
6962 return 0;
+
6963 return 1;
+
6964}
+
6965
+
6966static int stbi__gif_test(stbi__context * s) {
+
6967 int r = stbi__gif_test_raw(s);
+
6968 stbi__rewind(s);
+
6969 return r;
+
6970}
+
6971
+
6972static void stbi__gif_parse_colortable(stbi__context * s, stbi_uc pal[256][4], int num_entries, int transp) {
+
6973 int i;
+
6974 for (i = 0; i < num_entries; ++i) {
+
6975 pal[i][2] = stbi__get8(s);
+
6976 pal[i][1] = stbi__get8(s);
+
6977 pal[i][0] = stbi__get8(s);
+
6978 pal[i][3] = transp == i ? 0 : 255;
+
6979 }
+
6980}
+
6981
+
6982static int stbi__gif_header(stbi__context * s, stbi__gif * g, int * comp, int is_info) {
+ +
6984 if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+
6985 return stbi__err("not GIF", "Corrupt GIF");
+
6986
+
6987 version = stbi__get8(s);
+
6988 if (version != '7' && version != '9')
+
6989 return stbi__err("not GIF", "Corrupt GIF");
+
6990 if (stbi__get8(s) != 'a')
+
6991 return stbi__err("not GIF", "Corrupt GIF");
+
6992
+ +
6994 g->w = stbi__get16le(s);
+
6995 g->h = stbi__get16le(s);
+
6996 g->flags = stbi__get8(s);
+
6997 g->bgindex = stbi__get8(s);
+
6998 g->ratio = stbi__get8(s);
+
6999 g->transparent = -1;
+
7000
+
7001 if (g->w > STBI_MAX_DIMENSIONS)
+
7002 return stbi__err("too large", "Very large image (corrupt?)");
+
7003 if (g->h > STBI_MAX_DIMENSIONS)
+
7004 return stbi__err("too large", "Very large image (corrupt?)");
+
7005
+
7006 if (comp != 0)
+
7007 *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
+
7008
+
7009 if (is_info)
+
7010 return 1;
+
7011
+
7012 if (g->flags & 0x80)
+
7013 stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
+
7014
+
7015 return 1;
+
7016}
+
7017
+
7018static int stbi__gif_info_raw(stbi__context * s, int * x, int * y, int * comp) {
+
7019 stbi__gif * g = (stbi__gif *)stbi__malloc(sizeof(stbi__gif));
+
7020 if (!g)
+
7021 return stbi__err("outofmem", "Out of memory");
+
7022 if (!stbi__gif_header(s, g, comp, 1)) {
+
7023 STBI_FREE(g);
+
7024 stbi__rewind(s);
+
7025 return 0;
+
7026 }
+
7027 if (x)
+
7028 *x = g->w;
+
7029 if (y)
+
7030 *y = g->h;
+
7031 STBI_FREE(g);
+
7032 return 1;
+
7033}
+
7034
+ +
7036 stbi_uc *p, *c;
+
7037 int idx;
+
7038
+
7039 // recurse to decode the prefixes, since the linked-list is backwards,
+
7040 // and working backwards through an interleaved image would be nasty
+
7041 if (g->codes[code].prefix >= 0)
+
7042 stbi__out_gif_code(g, g->codes[code].prefix);
+
7043
+
7044 if (g->cur_y >= g->max_y)
+
7045 return;
+
7046
+
7047 idx = g->cur_x + g->cur_y;
+
7048 p = &g->out[idx];
+
7049 g->history[idx / 4] = 1;
+
7050
+
7051 c = &g->color_table[g->codes[code].suffix * 4];
+
7052 if (c[3] > 128) { // don't render transparent pixels;
+
7053 p[0] = c[2];
+
7054 p[1] = c[1];
+
7055 p[2] = c[0];
+
7056 p[3] = c[3];
+
7057 }
+
7058 g->cur_x += 4;
+
7059
+
7060 if (g->cur_x >= g->max_x) {
+
7061 g->cur_x = g->start_x;
+
7062 g->cur_y += g->step;
+
7063
+
7064 while (g->cur_y >= g->max_y && g->parse > 0) {
+
7065 g->step = (1 << g->parse) * g->line_size;
+
7066 g->cur_y = g->start_y + (g->step >> 1);
+
7067 --g->parse;
+
7068 }
+
7069 }
+
7070}
+
7071
+ + + + + +
7077 stbi__gif_lzw * p;
+
7078
+
7079 lzw_cs = stbi__get8(s);
+
7080 if (lzw_cs > 12)
+
7081 return NULL;
+
7082 clear = 1 << lzw_cs;
+
7083 first = 1;
+
7084 codesize = lzw_cs + 1;
+
7085 codemask = (1 << codesize) - 1;
+
7086 bits = 0;
+
7087 valid_bits = 0;
+
7088 for (init_code = 0; init_code < clear; init_code++) {
+
7089 g->codes[init_code].prefix = -1;
+
7090 g->codes[init_code].first = (stbi_uc)init_code;
+
7091 g->codes[init_code].suffix = (stbi_uc)init_code;
+
7092 }
+
7093
+
7094 // support no starting clear code
+
7095 avail = clear + 2;
+
7096 oldcode = -1;
+
7097
+
7098 len = 0;
+
7099 for (;;) {
+
7100 if (valid_bits < codesize) {
+
7101 if (len == 0) {
+
7102 len = stbi__get8(s); // start new block
+
7103 if (len == 0)
+
7104 return g->out;
+
7105 }
+
7106 --len;
+ +
7108 valid_bits += 8;
+
7109 } else {
+ +
7111 bits >>= codesize;
+ +
7113 // @OPTIMIZE: is there some way we can accelerate the non-clear path?
+
7114 if (code == clear) { // clear code
+
7115 codesize = lzw_cs + 1;
+
7116 codemask = (1 << codesize) - 1;
+
7117 avail = clear + 2;
+
7118 oldcode = -1;
+
7119 first = 0;
+
7120 } else if (code == clear + 1) { // end of stream code
+
7121 stbi__skip(s, len);
+
7122 while ((len = stbi__get8(s)) > 0)
+
7123 stbi__skip(s, len);
+
7124 return g->out;
+
7125 } else if (code <= avail) {
+
7126 if (first) {
+
7127 return stbi__errpuc("no clear code", "Corrupt GIF");
+
7128 }
+
7129
+
7130 if (oldcode >= 0) {
+
7131 p = &g->codes[avail++];
+
7132 if (avail > 8192) {
+
7133 return stbi__errpuc("too many codes", "Corrupt GIF");
+
7134 }
+
7135
+
7136 p->prefix = (stbi__int16)oldcode;
+
7137 p->first = g->codes[oldcode].first;
+
7138 p->suffix = (code == avail) ? p->first : g->codes[code].first;
+
7139 } else if (code == avail)
+
7140 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
7141
+ +
7143
+
7144 if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+
7145 codesize++;
+
7146 codemask = (1 << codesize) - 1;
+
7147 }
+
7148
+
7149 oldcode = code;
+
7150 } else {
+
7151 return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
7152 }
+
7153 }
+
7154 }
+
7155}
+
7156
+
7157// this function is designed to support animated gifs, although stb_image doesn't support it
+
7158// two back is the image from two frames ago, used for a very specific disposal format
+ +
7160 int dispose;
+
7161 int first_frame;
+
7162 int pi;
+
7163 int pcount;
+ +
7165
+
7166 // on first frame, any non-written pixels get the background colour (non-transparent)
+
7167 first_frame = 0;
+
7168 if (g->out == 0) {
+
7169 if (!stbi__gif_header(s, g, comp, 0))
+
7170 return 0; // stbi__g_failure_reason set by stbi__gif_header
+
7171 if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
+
7172 return stbi__errpuc("too large", "GIF image is too large");
+
7173 pcount = g->w * g->h;
+
7174 g->out = (stbi_uc *)stbi__malloc(4 * pcount);
+
7175 g->background = (stbi_uc *)stbi__malloc(4 * pcount);
+
7176 g->history = (stbi_uc *)stbi__malloc(pcount);
+
7177 if (!g->out || !g->background || !g->history)
+
7178 return stbi__errpuc("outofmem", "Out of memory");
+
7179
+
7180 // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+
7181 // background colour is only used for pixels that are not rendered first frame, after that "background"
+
7182 // color refers to the color that was there the previous frame.
+
7183 memset(g->out, 0x00, 4 * pcount);
+
7184 memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
+
7185 memset(g->history, 0x00, pcount); // pixels that were affected previous frame
+
7186 first_frame = 1;
+
7187 } else {
+
7188 // second frame - how do we dispose of the previous one?
+
7189 dispose = (g->eflags & 0x1C) >> 2;
+
7190 pcount = g->w * g->h;
+
7191
+
7192 if ((dispose == 3) && (two_back == 0)) {
+
7193 dispose = 2; // if I don't have an image to revert back to, default to the old background
+
7194 }
+
7195
+
7196 if (dispose == 3) { // use previous graphic
+
7197 for (pi = 0; pi < pcount; ++pi) {
+
7198 if (g->history[pi]) {
+
7199 memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);
+
7200 }
+
7201 }
+
7202 } else if (dispose == 2) {
+
7203 // restore what was changed last frame to background before that frame;
+
7204 for (pi = 0; pi < pcount; ++pi) {
+
7205 if (g->history[pi]) {
+
7206 memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);
+
7207 }
+
7208 }
+
7209 } else {
+
7210 // This is a non-disposal case eithe way, so just
+
7211 // leave the pixels as is, and they will become the new background
+
7212 // 1: do not dispose
+
7213 // 0: not specified.
+
7214 }
+
7215
+
7216 // background is what out is after the undoing of the previou frame;
+
7217 memcpy(g->background, g->out, 4 * g->w * g->h);
+
7218 }
+
7219
+
7220 // clear my history;
+
7221 memset(g->history, 0x00, g->w * g->h); // pixels that were affected previous frame
+
7222
+
7223 for (;;) {
+
7224 int tag = stbi__get8(s);
+
7225 switch (tag) {
+
7226 case 0x2C: /* Image Descriptor */
+
7227 {
+
7228 stbi__int32 x, y, w, h;
+
7229 stbi_uc * o;
+
7230
+
7231 x = stbi__get16le(s);
+
7232 y = stbi__get16le(s);
+
7233 w = stbi__get16le(s);
+
7234 h = stbi__get16le(s);
+
7235 if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+
7236 return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
7237
+
7238 g->line_size = g->w * 4;
+
7239 g->start_x = x * 4;
+
7240 g->start_y = y * g->line_size;
+
7241 g->max_x = g->start_x + w * 4;
+
7242 g->max_y = g->start_y + h * g->line_size;
+
7243 g->cur_x = g->start_x;
+
7244 g->cur_y = g->start_y;
+
7245
+
7246 // if the width of the specified rectangle is 0, that means
+
7247 // we may not see *any* pixels or the image is malformed;
+
7248 // to make sure this is caught, move the current y down to
+
7249 // max_y (which is what out_gif_code checks).
+
7250 if (w == 0)
+
7251 g->cur_y = g->max_y;
+
7252
+
7253 g->lflags = stbi__get8(s);
+
7254
+
7255 if (g->lflags & 0x40) {
+
7256 g->step = 8 * g->line_size; // first interlaced spacing
+
7257 g->parse = 3;
+
7258 } else {
+
7259 g->step = g->line_size;
+
7260 g->parse = 0;
+
7261 }
+
7262
+
7263 if (g->lflags & 0x80) {
+
7264 stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+
7265 g->color_table = (stbi_uc *)g->lpal;
+
7266 } else if (g->flags & 0x80) {
+
7267 g->color_table = (stbi_uc *)g->pal;
+
7268 } else
+
7269 return stbi__errpuc("missing color table", "Corrupt GIF");
+
7270
+ +
7272 if (!o)
+
7273 return NULL;
+
7274
+
7275 // if this was the first frame,
+
7276 pcount = g->w * g->h;
+
7277 if (first_frame && (g->bgindex > 0)) {
+
7278 // if first frame, any pixel not drawn to gets the background color
+
7279 for (pi = 0; pi < pcount; ++pi) {
+
7280 if (g->history[pi] == 0) {
+
7281 g->pal[g->bgindex][3] =
+
7282 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+
7283 memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);
+
7284 }
+
7285 }
+
7286 }
+
7287
+
7288 return o;
+
7289 }
+
7290
+
7291 case 0x21: // Comment Extension.
+
7292 {
+
7293 int len;
+
7294 int ext = stbi__get8(s);
+
7295 if (ext == 0xF9) { // Graphic Control Extension.
+
7296 len = stbi__get8(s);
+
7297 if (len == 4) {
+
7298 g->eflags = stbi__get8(s);
+
7299 g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
7300
+
7301 // unset old transparent
+
7302 if (g->transparent >= 0) {
+
7303 g->pal[g->transparent][3] = 255;
+
7304 }
+
7305 if (g->eflags & 0x01) {
+
7306 g->transparent = stbi__get8(s);
+
7307 if (g->transparent >= 0) {
+
7308 g->pal[g->transparent][3] = 0;
+
7309 }
+
7310 } else {
+
7311 // don't need transparent
+
7312 stbi__skip(s, 1);
+
7313 g->transparent = -1;
+
7314 }
+
7315 } else {
+
7316 stbi__skip(s, len);
+
7317 break;
+
7318 }
+
7319 }
+
7320 while ((len = stbi__get8(s)) != 0) {
+
7321 stbi__skip(s, len);
+
7322 }
+
7323 break;
+
7324 }
+
7325
+
7326 case 0x3B: // gif stream termination code
+
7327 return (stbi_uc *)s; // using '1' causes warning on some compilers
+
7328
+
7329 default:
+
7330 return stbi__errpuc("unknown code", "Corrupt GIF");
+
7331 }
+
7332 }
+
7333}
+
7334
+
7335static void * stbi__load_gif_main_outofmem(stbi__gif * g, stbi_uc * out, int ** delays) {
+
7336 STBI_FREE(g->out);
+
7337 STBI_FREE(g->history);
+
7338 STBI_FREE(g->background);
+
7339
+
7340 if (out)
+
7341 STBI_FREE(out);
+
7342 if (delays && *delays)
+
7343 STBI_FREE(*delays);
+
7344 return stbi__errpuc("outofmem", "Out of memory");
+
7345}
+
7346
+
7347static void * stbi__load_gif_main(stbi__context * s, int ** delays, int * x, int * y, int * z, int * comp, int req_comp) {
+
7348 if (stbi__gif_test(s)) {
+
7349 int layers = 0;
+
7350 stbi_uc * u = 0;
+
7351 stbi_uc * out = 0;
+
7352 stbi_uc * two_back = 0;
+
7353 stbi__gif g;
+
7354 int stride;
+
7355 int out_size = 0;
+
7356 int delays_size = 0;
+
7357
+ + +
7360
+
7361 memset(&g, 0, sizeof(g));
+
7362 if (delays) {
+
7363 *delays = 0;
+
7364 }
+
7365
+
7366 do {
+ +
7368 if (u == (stbi_uc *)s)
+
7369 u = 0; // end of animated gif marker
+
7370
+
7371 if (u) {
+
7372 *x = g.w;
+
7373 *y = g.h;
+
7374 ++layers;
+
7375 stride = g.w * g.h * 4;
+
7376
+
7377 if (out) {
+
7378 void * tmp = (stbi_uc *)STBI_REALLOC_SIZED(out, out_size, layers * stride);
+
7379 if (!tmp)
+ +
7381 else {
+
7382 out = (stbi_uc *)tmp;
+
7383 out_size = layers * stride;
+
7384 }
+
7385
+
7386 if (delays) {
+
7387 int * new_delays = (int *)STBI_REALLOC_SIZED(*delays, delays_size, sizeof(int) * layers);
+
7388 if (!new_delays)
+ +
7390 *delays = new_delays;
+
7391 delays_size = layers * sizeof(int);
+
7392 }
+
7393 } else {
+
7394 out = (stbi_uc *)stbi__malloc(layers * stride);
+
7395 if (!out)
+ +
7397 out_size = layers * stride;
+
7398 if (delays) {
+
7399 *delays = (int *)stbi__malloc(layers * sizeof(int));
+
7400 if (!*delays)
+ +
7402 delays_size = layers * sizeof(int);
+
7403 }
+
7404 }
+
7405 memcpy(out + ((layers - 1) * stride), u, stride);
+
7406 if (layers >= 2) {
+
7407 two_back = out - 2 * stride;
+
7408 }
+
7409
+
7410 if (delays) {
+
7411 (*delays)[layers - 1U] = g.delay;
+
7412 }
+
7413 }
+
7414 } while (u != 0);
+
7415
+
7416 // free temp buffer;
+
7417 STBI_FREE(g.out);
+
7418 STBI_FREE(g.history);
+
7419 STBI_FREE(g.background);
+
7420
+
7421 // do the final conversion after loading everything;
+
7422 if (req_comp && req_comp != 4)
+
7423 out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
7424
+
7425 *z = layers;
+
7426 return out;
+
7427 } else {
+
7428 return stbi__errpuc("not GIF", "Image was not as a gif type.");
+
7429 }
+
7430}
+
7431
+
7432static void * stbi__gif_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
7433 stbi_uc * u = 0;
+
7434 stbi__gif g;
+
7435 memset(&g, 0, sizeof(g));
+ +
7437
+ +
7439 if (u == (stbi_uc *)s)
+
7440 u = 0; // end of animated gif marker
+
7441 if (u) {
+
7442 *x = g.w;
+
7443 *y = g.h;
+
7444
+
7445 // moved conversion to after successful load so that the same
+
7446 // can be done for multiple frames.
+
7447 if (req_comp && req_comp != 4)
+
7448 u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+
7449 } else if (g.out) {
+
7450 // if there was an error and we allocated an image buffer, free it!
+
7451 STBI_FREE(g.out);
+
7452 }
+
7453
+
7454 // free buffers needed for multiple frame loading;
+
7455 STBI_FREE(g.history);
+
7456 STBI_FREE(g.background);
+
7457
+
7458 return u;
+
7459}
+
7460
+
7461static int stbi__gif_info(stbi__context * s, int * x, int * y, int * comp) { return stbi__gif_info_raw(s, x, y, comp); }
+
7462#endif
+
7463
+
7464// *************************************************************************************************
+
7465// Radiance RGBE HDR loader
+
7466// originally by Nicolas Schulz
+
7467#ifndef STBI_NO_HDR
+
7468static int stbi__hdr_test_core(stbi__context * s, const char * signature) {
+
7469 int i;
+
7470 for (i = 0; signature[i]; ++i)
+
7471 if (stbi__get8(s) != signature[i])
+
7472 return 0;
+
7473 stbi__rewind(s);
+
7474 return 1;
+
7475}
+
7476
+
7477static int stbi__hdr_test(stbi__context * s) {
+
7478 int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+
7479 stbi__rewind(s);
+
7480 if (!r) {
+
7481 r = stbi__hdr_test_core(s, "#?RGBE\n");
+
7482 stbi__rewind(s);
+
7483 }
+
7484 return r;
+
7485}
+
7486
+
7487#define STBI__HDR_BUFLEN 1024
+
7488static char * stbi__hdr_gettoken(stbi__context * z, char * buffer) {
+
7489 int len = 0;
+
7490 char c = '\0';
+
7491
+
7492 c = (char)stbi__get8(z);
+
7493
+
7494 while (!stbi__at_eof(z) && c != '\n') {
+
7495 buffer[len++] = c;
+
7496 if (len == STBI__HDR_BUFLEN - 1) {
+
7497 // flush to end of line
+
7498 while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+
7499 ;
+
7500 break;
+
7501 }
+
7502 c = (char)stbi__get8(z);
+
7503 }
+
7504
+
7505 buffer[len] = 0;
+
7506 return buffer;
+
7507}
+
7508
+
7509static void stbi__hdr_convert(float * output, stbi_uc * input, int req_comp) {
+
7510 if (input[3] != 0) {
+
7511 float f1;
+
7512 // Exponent
+
7513 f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
+
7514 if (req_comp <= 2)
+
7515 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+
7516 else {
+
7517 output[0] = input[0] * f1;
+
7518 output[1] = input[1] * f1;
+
7519 output[2] = input[2] * f1;
+
7520 }
+
7521 if (req_comp == 2)
+
7522 output[1] = 1;
+
7523 if (req_comp == 4)
+
7524 output[3] = 1;
+
7525 } else {
+
7526 switch (req_comp) {
+
7527 case 4:
+
7528 output[3] = 1; /* fallthrough */
+
7529 case 3:
+
7530 output[0] = output[1] = output[2] = 0;
+
7531 break;
+
7532 case 2:
+
7533 output[1] = 1; /* fallthrough */
+
7534 case 1:
+
7535 output[0] = 0;
+
7536 break;
+
7537 }
+
7538 }
+
7539}
+
7540
+
7541static float * stbi__hdr_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+ +
7543 char * token;
+
7544 int valid = 0;
+
7545 int width, height;
+
7546 stbi_uc * scanline;
+
7547 float * hdr_data;
+
7548 int len;
+
7549 unsigned char count, value;
+
7550 int i, j, k, c1, c2, z;
+
7551 const char * headerToken;
+ +
7553
+
7554 // Check identifier
+ +
7556 if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+
7557 return stbi__errpf("not HDR", "Corrupt HDR image");
+
7558
+
7559 // Parse header
+
7560 for (;;) {
+ +
7562 if (token[0] == 0)
+
7563 break;
+
7564 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
+
7565 valid = 1;
+
7566 }
+
7567
+
7568 if (!valid)
+
7569 return stbi__errpf("unsupported format", "Unsupported HDR format");
+
7570
+
7571 // Parse width and height
+
7572 // can't use sscanf() if we're not using stdio!
+ +
7574 if (strncmp(token, "-Y ", 3))
+
7575 return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+
7576 token += 3;
+
7577 height = (int)strtol(token, &token, 10);
+
7578 while (*token == ' ')
+
7579 ++token;
+
7580 if (strncmp(token, "+X ", 3))
+
7581 return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+
7582 token += 3;
+
7583 width = (int)strtol(token, NULL, 10);
+
7584
+ +
7586 return stbi__errpf("too large", "Very large image (corrupt?)");
+ +
7588 return stbi__errpf("too large", "Very large image (corrupt?)");
+
7589
+
7590 *x = width;
+
7591 *y = height;
+
7592
+
7593 if (comp)
+
7594 *comp = 3;
+
7595 if (req_comp == 0)
+
7596 req_comp = 3;
+
7597
+
7598 if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+
7599 return stbi__errpf("too large", "HDR image is too large");
+
7600
+
7601 // Read data
+
7602 hdr_data = (float *)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+
7603 if (!hdr_data)
+
7604 return stbi__errpf("outofmem", "Out of memory");
+
7605
+
7606 // Load image data
+
7607 // image data is stored as some number of sca
+
7608 if (width < 8 || width >= 32768) {
+
7609 // Read flat data
+
7610 for (j = 0; j < height; ++j) {
+
7611 for (i = 0; i < width; ++i) {
+
7612 stbi_uc rgbe[4];
+ +
7614 stbi__getn(s, rgbe, 4);
+ +
7616 }
+
7617 }
+
7618 } else {
+
7619 // Read RLE-encoded data
+
7620 scanline = NULL;
+
7621
+
7622 for (j = 0; j < height; ++j) {
+
7623 c1 = stbi__get8(s);
+
7624 c2 = stbi__get8(s);
+
7625 len = stbi__get8(s);
+
7626 if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+
7627 // not run-length encoded, so we have to actually use THIS data as a decoded
+
7628 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+
7629 stbi_uc rgbe[4];
+
7630 rgbe[0] = (stbi_uc)c1;
+
7631 rgbe[1] = (stbi_uc)c2;
+
7632 rgbe[2] = (stbi_uc)len;
+
7633 rgbe[3] = (stbi_uc)stbi__get8(s);
+ +
7635 i = 1;
+
7636 j = 0;
+ +
7638 goto main_decode_loop; // yes, this makes no sense
+
7639 }
+
7640 len <<= 8;
+
7641 len |= stbi__get8(s);
+
7642 if (len != width) {
+ + +
7645 return stbi__errpf("invalid decoded scanline length", "corrupt HDR");
+
7646 }
+
7647 if (scanline == NULL) {
+ +
7649 if (!scanline) {
+ +
7651 return stbi__errpf("outofmem", "Out of memory");
+
7652 }
+
7653 }
+
7654
+
7655 for (k = 0; k < 4; ++k) {
+
7656 int nleft;
+
7657 i = 0;
+
7658 while ((nleft = width - i) > 0) {
+
7659 count = stbi__get8(s);
+
7660 if (count > 128) {
+
7661 // Run
+
7662 value = stbi__get8(s);
+
7663 count -= 128;
+
7664 if ((count == 0) || (count > nleft)) {
+ + +
7667 return stbi__errpf("corrupt", "bad RLE data in HDR");
+
7668 }
+
7669 for (z = 0; z < count; ++z)
+
7670 scanline[i++ * 4 + k] = value;
+
7671 } else {
+
7672 // Dump
+
7673 if ((count == 0) || (count > nleft)) {
+ + +
7676 return stbi__errpf("corrupt", "bad RLE data in HDR");
+
7677 }
+
7678 for (z = 0; z < count; ++z)
+
7679 scanline[i++ * 4 + k] = stbi__get8(s);
+
7680 }
+
7681 }
+
7682 }
+
7683 for (i = 0; i < width; ++i)
+ +
7685 }
+
7686 if (scanline)
+ +
7688 }
+
7689
+
7690 return hdr_data;
+
7691}
+
7692
+
7693static int stbi__hdr_info(stbi__context * s, int * x, int * y, int * comp) {
+ +
7695 char * token;
+
7696 int valid = 0;
+
7697 int dummy;
+
7698
+
7699 if (!x)
+
7700 x = &dummy;
+
7701 if (!y)
+
7702 y = &dummy;
+
7703 if (!comp)
+
7704 comp = &dummy;
+
7705
+
7706 if (stbi__hdr_test(s) == 0) {
+
7707 stbi__rewind(s);
+
7708 return 0;
+
7709 }
+
7710
+
7711 for (;;) {
+ +
7713 if (token[0] == 0)
+
7714 break;
+
7715 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
+
7716 valid = 1;
+
7717 }
+
7718
+
7719 if (!valid) {
+
7720 stbi__rewind(s);
+
7721 return 0;
+
7722 }
+ +
7724 if (strncmp(token, "-Y ", 3)) {
+
7725 stbi__rewind(s);
+
7726 return 0;
+
7727 }
+
7728 token += 3;
+
7729 *y = (int)strtol(token, &token, 10);
+
7730 while (*token == ' ')
+
7731 ++token;
+
7732 if (strncmp(token, "+X ", 3)) {
+
7733 stbi__rewind(s);
+
7734 return 0;
+
7735 }
+
7736 token += 3;
+
7737 *x = (int)strtol(token, NULL, 10);
+
7738 *comp = 3;
+
7739 return 1;
+
7740}
+
7741#endif // STBI_NO_HDR
+
7742
+
7743#ifndef STBI_NO_BMP
+
7744static int stbi__bmp_info(stbi__context * s, int * x, int * y, int * comp) {
+
7745 void * p;
+ +
7747
+
7748 info.all_a = 255;
+ +
7750 if (p == NULL) {
+
7751 stbi__rewind(s);
+
7752 return 0;
+
7753 }
+
7754 if (x)
+
7755 *x = s->img_x;
+
7756 if (y)
+
7757 *y = s->img_y;
+
7758 if (comp) {
+
7759 if (info.bpp == 24 && info.ma == 0xff000000)
+
7760 *comp = 3;
+
7761 else
+
7762 *comp = info.ma ? 4 : 3;
+
7763 }
+
7764 return 1;
+
7765}
+
7766#endif
+
7767
+
7768#ifndef STBI_NO_PSD
+
7769static int stbi__psd_info(stbi__context * s, int * x, int * y, int * comp) {
+
7770 int channelCount, dummy, depth;
+
7771 if (!x)
+
7772 x = &dummy;
+
7773 if (!y)
+
7774 y = &dummy;
+
7775 if (!comp)
+
7776 comp = &dummy;
+
7777 if (stbi__get32be(s) != 0x38425053) {
+
7778 stbi__rewind(s);
+
7779 return 0;
+
7780 }
+
7781 if (stbi__get16be(s) != 1) {
+
7782 stbi__rewind(s);
+
7783 return 0;
+
7784 }
+
7785 stbi__skip(s, 6);
+ + +
7788 stbi__rewind(s);
+
7789 return 0;
+
7790 }
+
7791 *y = stbi__get32be(s);
+
7792 *x = stbi__get32be(s);
+ +
7794 if (depth != 8 && depth != 16) {
+
7795 stbi__rewind(s);
+
7796 return 0;
+
7797 }
+
7798 if (stbi__get16be(s) != 3) {
+
7799 stbi__rewind(s);
+
7800 return 0;
+
7801 }
+
7802 *comp = 4;
+
7803 return 1;
+
7804}
+
7805
+
7806static int stbi__psd_is16(stbi__context * s) {
+
7807 int channelCount, depth;
+
7808 if (stbi__get32be(s) != 0x38425053) {
+
7809 stbi__rewind(s);
+
7810 return 0;
+
7811 }
+
7812 if (stbi__get16be(s) != 1) {
+
7813 stbi__rewind(s);
+
7814 return 0;
+
7815 }
+
7816 stbi__skip(s, 6);
+ + +
7819 stbi__rewind(s);
+
7820 return 0;
+
7821 }
+ + + +
7825 if (depth != 16) {
+
7826 stbi__rewind(s);
+
7827 return 0;
+
7828 }
+
7829 return 1;
+
7830}
+
7831#endif
+
7832
+
7833#ifndef STBI_NO_PIC
+
7834static int stbi__pic_info(stbi__context * s, int * x, int * y, int * comp) {
+
7835 int act_comp = 0, num_packets = 0, chained, dummy;
+ +
7837
+
7838 if (!x)
+
7839 x = &dummy;
+
7840 if (!y)
+
7841 y = &dummy;
+
7842 if (!comp)
+
7843 comp = &dummy;
+
7844
+
7845 if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
+
7846 stbi__rewind(s);
+
7847 return 0;
+
7848 }
+
7849
+
7850 stbi__skip(s, 88);
+
7851
+
7852 *x = stbi__get16be(s);
+
7853 *y = stbi__get16be(s);
+
7854 if (stbi__at_eof(s)) {
+
7855 stbi__rewind(s);
+
7856 return 0;
+
7857 }
+
7858 if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
+
7859 stbi__rewind(s);
+
7860 return 0;
+
7861 }
+
7862
+
7863 stbi__skip(s, 8);
+
7864
+
7865 do {
+ +
7867
+
7868 if (num_packets == sizeof(packets) / sizeof(packets[0]))
+
7869 return 0;
+
7870
+ +
7872 chained = stbi__get8(s);
+
7873 packet->size = stbi__get8(s);
+
7874 packet->type = stbi__get8(s);
+
7875 packet->channel = stbi__get8(s);
+
7876 act_comp |= packet->channel;
+
7877
+
7878 if (stbi__at_eof(s)) {
+
7879 stbi__rewind(s);
+
7880 return 0;
+
7881 }
+
7882 if (packet->size != 8) {
+
7883 stbi__rewind(s);
+
7884 return 0;
+
7885 }
+
7886 } while (chained);
+
7887
+
7888 *comp = (act_comp & 0x10 ? 4 : 3);
+
7889
+
7890 return 1;
+
7891}
+
7892#endif
+
7893
+
7894// *************************************************************************************************
+
7895// Portable Gray Map and Portable Pixel Map loader
+
7896// by Ken Miller
+
7897//
+
7898// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+
7899// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+
7900//
+
7901// Known limitations:
+
7902// Does not support comments in the header section
+
7903// Does not support ASCII image data (formats P2 and P3)
+
7904
+
7905#ifndef STBI_NO_PNM
+
7906
+
7907static int stbi__pnm_test(stbi__context * s) {
+
7908 char p, t;
+
7909 p = (char)stbi__get8(s);
+
7910 t = (char)stbi__get8(s);
+
7911 if (p != 'P' || (t != '5' && t != '6')) {
+
7912 stbi__rewind(s);
+
7913 return 0;
+
7914 }
+
7915 return 1;
+
7916}
+
7917
+
7918static void * stbi__pnm_load(stbi__context * s, int * x, int * y, int * comp, int req_comp, stbi__result_info * ri) {
+
7919 stbi_uc * out;
+ +
7921
+
7922 ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+
7923 if (ri->bits_per_channel == 0)
+
7924 return 0;
+
7925
+
7926 if (s->img_y > STBI_MAX_DIMENSIONS)
+
7927 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
7928 if (s->img_x > STBI_MAX_DIMENSIONS)
+
7929 return stbi__errpuc("too large", "Very large image (corrupt?)");
+
7930
+
7931 *x = s->img_x;
+
7932 *y = s->img_y;
+
7933 if (comp)
+
7934 *comp = s->img_n;
+
7935
+
7936 if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
+
7937 return stbi__errpuc("too large", "PNM too large");
+
7938
+
7939 out = (stbi_uc *)stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
+
7940 if (!out)
+
7941 return stbi__errpuc("outofmem", "Out of memory");
+
7942 if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
+
7943 STBI_FREE(out);
+
7944 return stbi__errpuc("bad PNM", "PNM file truncated");
+
7945 }
+
7946
+
7947 if (req_comp && req_comp != s->img_n) {
+
7948 if (ri->bits_per_channel == 16) {
+
7949 out = (stbi_uc *)stbi__convert_format16((stbi__uint16 *)out, s->img_n, req_comp, s->img_x, s->img_y);
+
7950 } else {
+
7951 out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+
7952 }
+
7953 if (out == NULL)
+
7954 return out; // stbi__convert_format frees input on failure
+
7955 }
+
7956 return out;
+
7957}
+
7958
+
7959static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; }
+
7960
+
7961static void stbi__pnm_skip_whitespace(stbi__context * s, char * c) {
+
7962 for (;;) {
+
7963 while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+
7964 *c = (char)stbi__get8(s);
+
7965
+
7966 if (stbi__at_eof(s) || *c != '#')
+
7967 break;
+
7968
+
7969 while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
+
7970 *c = (char)stbi__get8(s);
+
7971 }
+
7972}
+
7973
+
7974static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; }
+
7975
+
7976static int stbi__pnm_getinteger(stbi__context * s, char * c) {
+
7977 int value = 0;
+
7978
+
7979 while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+
7980 value = value * 10 + (*c - '0');
+
7981 *c = (char)stbi__get8(s);
+
7982 if ((value > 214748364) || (value == 214748364 && *c > '7'))
+
7983 return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
+
7984 }
+
7985
+
7986 return value;
+
7987}
+
7988
+
7989static int stbi__pnm_info(stbi__context * s, int * x, int * y, int * comp) {
+
7990 int maxv, dummy;
+
7991 char c, p, t;
+
7992
+
7993 if (!x)
+
7994 x = &dummy;
+
7995 if (!y)
+
7996 y = &dummy;
+
7997 if (!comp)
+
7998 comp = &dummy;
+
7999
+
8000 stbi__rewind(s);
+
8001
+
8002 // Get identifier
+
8003 p = (char)stbi__get8(s);
+
8004 t = (char)stbi__get8(s);
+
8005 if (p != 'P' || (t != '5' && t != '6')) {
+
8006 stbi__rewind(s);
+
8007 return 0;
+
8008 }
+
8009
+
8010 *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
+
8011
+
8012 c = (char)stbi__get8(s);
+ +
8014
+
8015 *x = stbi__pnm_getinteger(s, &c); // read width
+
8016 if (*x == 0)
+
8017 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+ +
8019
+
8020 *y = stbi__pnm_getinteger(s, &c); // read height
+
8021 if (*y == 0)
+
8022 return stbi__err("invalid width", "PPM image header had zero or overflowing width");
+ +
8024
+
8025 maxv = stbi__pnm_getinteger(s, &c); // read max value
+
8026 if (maxv > 65535)
+
8027 return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+
8028 else if (maxv > 255)
+
8029 return 16;
+
8030 else
+
8031 return 8;
+
8032}
+
8033
+
8034static int stbi__pnm_is16(stbi__context * s) {
+
8035 if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+
8036 return 1;
+
8037 return 0;
+
8038}
+
8039#endif
+
8040
+
8041static int stbi__info_main(stbi__context * s, int * x, int * y, int * comp) {
+
8042#ifndef STBI_NO_JPEG
+
8043 if (stbi__jpeg_info(s, x, y, comp))
+
8044 return 1;
+
8045#endif
+
8046
+
8047#ifndef STBI_NO_PNG
+
8048 if (stbi__png_info(s, x, y, comp))
+
8049 return 1;
+
8050#endif
+
8051
+
8052#ifndef STBI_NO_GIF
+
8053 if (stbi__gif_info(s, x, y, comp))
+
8054 return 1;
+
8055#endif
+
8056
+
8057#ifndef STBI_NO_BMP
+
8058 if (stbi__bmp_info(s, x, y, comp))
+
8059 return 1;
+
8060#endif
+
8061
+
8062#ifndef STBI_NO_PSD
+
8063 if (stbi__psd_info(s, x, y, comp))
+
8064 return 1;
+
8065#endif
+
8066
+
8067#ifndef STBI_NO_PIC
+
8068 if (stbi__pic_info(s, x, y, comp))
+
8069 return 1;
+
8070#endif
+
8071
+
8072#ifndef STBI_NO_PNM
+
8073 if (stbi__pnm_info(s, x, y, comp))
+
8074 return 1;
+
8075#endif
+
8076
+
8077#ifndef STBI_NO_HDR
+
8078 if (stbi__hdr_info(s, x, y, comp))
+
8079 return 1;
+
8080#endif
+
8081
+
8082// test tga last because it's a crappy test!
+
8083#ifndef STBI_NO_TGA
+
8084 if (stbi__tga_info(s, x, y, comp))
+
8085 return 1;
+
8086#endif
+
8087 return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+
8088}
+
8089
+
8090static int stbi__is_16_main(stbi__context * s) {
+
8091#ifndef STBI_NO_PNG
+
8092 if (stbi__png_is16(s))
+
8093 return 1;
+
8094#endif
+
8095
+
8096#ifndef STBI_NO_PSD
+
8097 if (stbi__psd_is16(s))
+
8098 return 1;
+
8099#endif
+
8100
+
8101#ifndef STBI_NO_PNM
+
8102 if (stbi__pnm_is16(s))
+
8103 return 1;
+
8104#endif
+
8105 return 0;
+
8106}
+
8107
+
8108#ifndef STBI_NO_STDIO
+
8109STBIDEF int stbi_info(char const * filename, int * x, int * y, int * comp) {
+
8110 FILE * f = stbi__fopen(filename, "rb");
+
8111 int result;
+
8112 if (!f)
+
8113 return stbi__err("can't fopen", "Unable to open file");
+
8114 result = stbi_info_from_file(f, x, y, comp);
+
8115 fclose(f);
+
8116 return result;
+
8117}
+
8118
+
8119STBIDEF int stbi_info_from_file(FILE * f, int * x, int * y, int * comp) {
+
8120 int r;
+ +
8122 long pos = ftell(f);
+
8123 stbi__start_file(&s, f);
+
8124 r = stbi__info_main(&s, x, y, comp);
+
8125 fseek(f, pos, SEEK_SET);
+
8126 return r;
+
8127}
+
8128
+
8129STBIDEF int stbi_is_16_bit(char const * filename) {
+
8130 FILE * f = stbi__fopen(filename, "rb");
+
8131 int result;
+
8132 if (!f)
+
8133 return stbi__err("can't fopen", "Unable to open file");
+
8134 result = stbi_is_16_bit_from_file(f);
+
8135 fclose(f);
+
8136 return result;
+
8137}
+
8138
+
8139STBIDEF int stbi_is_16_bit_from_file(FILE * f) {
+
8140 int r;
+ +
8142 long pos = ftell(f);
+
8143 stbi__start_file(&s, f);
+
8144 r = stbi__is_16_main(&s);
+
8145 fseek(f, pos, SEEK_SET);
+
8146 return r;
+
8147}
+
8148#endif // !STBI_NO_STDIO
+
8149
+
8150STBIDEF int stbi_info_from_memory(stbi_uc const * buffer, int len, int * x, int * y, int * comp) {
+ + +
8153 return stbi__info_main(&s, x, y, comp);
+
8154}
+
8155
+
8156STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const * c, void * user, int * x, int * y, int * comp) {
+ + +
8159 return stbi__info_main(&s, x, y, comp);
+
8160}
+
8161
+
8162STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const * buffer, int len) {
+ + +
8165 return stbi__is_16_main(&s);
+
8166}
+
8167
+
8168STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * c, void * user) {
+ + +
8171 return stbi__is_16_main(&s);
+
8172}
+
8173
+
8174#endif // STB_IMAGE_IMPLEMENTATION
+
8175
+
8176/*
+
8177 revision history:
+
8178 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+
8179 2.19 (2018-02-11) fix warning
+
8180 2.18 (2018-01-30) fix warnings
+
8181 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+
8182 1-bit BMP
+
8183 *_is_16_bit api
+
8184 avoid warnings
+
8185 2.16 (2017-07-23) all functions have 16-bit variants;
+
8186 STBI_NO_STDIO works again;
+
8187 compilation fixes;
+
8188 fix rounding in unpremultiply;
+
8189 optimize vertical flip;
+
8190 disable raw_len validation;
+
8191 documentation fixes
+
8192 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+
8193 warning fixes; disable run-time SSE detection on gcc;
+
8194 uniform handling of optional "return" values;
+
8195 thread-safe initialization of zlib tables
+
8196 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+
8197 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now
+
8198 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+
8199 2.11 (2016-04-02) allocate large structures on the stack
+
8200 remove white matting for transparent PSD
+
8201 fix reported channel count for PNG & BMP
+
8202 re-enable SSE2 in non-gcc 64-bit
+
8203 support RGB-formatted JPEG
+
8204 read 16-bit PNGs (only as 8-bit)
+
8205 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+
8206 2.09 (2016-01-16) allow comments in PNM files
+
8207 16-bit-per-pixel TGA (not bit-per-component)
+
8208 info() for TGA could break due to .hdr handling
+
8209 info() for BMP to shares code instead of sloppy parse
+
8210 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+
8211 code cleanup
+
8212 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+
8213 2.07 (2015-09-13) fix compiler warnings
+
8214 partial animated GIF support
+
8215 limited 16-bpc PSD support
+
8216 #ifdef unused functions
+
8217 bug with < 92 byte PIC,PNM,HDR,TGA
+
8218 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
+
8219 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
+
8220 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+
8221 2.03 (2015-04-12) extra corruption checking (mmozeiko)
+
8222 stbi_set_flip_vertically_on_load (nguillemot)
+
8223 fix NEON support; fix mingw support
+
8224 2.02 (2015-01-19) fix incorrect assert, fix warning
+
8225 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+
8226 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+
8227 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+
8228 progressive JPEG (stb)
+
8229 PGM/PPM support (Ken Miller)
+
8230 STBI_MALLOC,STBI_REALLOC,STBI_FREE
+
8231 GIF bugfix -- seemingly never worked
+
8232 STBI_NO_*, STBI_ONLY_*
+
8233 1.48 (2014-12-14) fix incorrectly-named assert()
+
8234 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+
8235 optimize PNG (ryg)
+
8236 fix bug in interlaced PNG with user-specified channel count (stb)
+
8237 1.46 (2014-08-26)
+
8238 fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+
8239 1.45 (2014-08-16)
+
8240 fix MSVC-ARM internal compiler error by wrapping malloc
+
8241 1.44 (2014-08-07)
+
8242 various warning fixes from Ronny Chevalier
+
8243 1.43 (2014-07-15)
+
8244 fix MSVC-only compiler problem in code changed in 1.42
+
8245 1.42 (2014-07-09)
+
8246 don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+
8247 fixes to stbi__cleanup_jpeg path
+
8248 added STBI_ASSERT to avoid requiring assert.h
+
8249 1.41 (2014-06-25)
+
8250 fix search&replace from 1.36 that messed up comments/error messages
+
8251 1.40 (2014-06-22)
+
8252 fix gcc struct-initialization warning
+
8253 1.39 (2014-06-15)
+
8254 fix to TGA optimization when req_comp != number of components in TGA;
+
8255 fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+
8256 add support for BMP version 5 (more ignored fields)
+
8257 1.38 (2014-06-06)
+
8258 suppress MSVC warnings on integer casts truncating values
+
8259 fix accidental rename of 'skip' field of I/O
+
8260 1.37 (2014-06-04)
+
8261 remove duplicate typedef
+
8262 1.36 (2014-06-03)
+
8263 convert to header file single-file library
+
8264 if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+
8265 1.35 (2014-05-27)
+
8266 various warnings
+
8267 fix broken STBI_SIMD path
+
8268 fix bug where stbi_load_from_file no longer left file pointer in correct place
+
8269 fix broken non-easy path for 32-bit BMP (possibly never used)
+
8270 TGA optimization by Arseny Kapoulkine
+
8271 1.34 (unknown)
+
8272 use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+
8273 1.33 (2011-07-14)
+
8274 make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+
8275 1.32 (2011-07-13)
+
8276 support for "info" function for all supported filetypes (SpartanJ)
+
8277 1.31 (2011-06-20)
+
8278 a few more leak fixes, bug in PNG handling (SpartanJ)
+
8279 1.30 (2011-06-11)
+
8280 added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+
8281 removed deprecated format-specific test/load functions
+
8282 removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks
+
8283 anyway error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in
+
8284 decoding 32-bit BMP (David Woo) 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville 1.28 (2010-08-01)
+
8285 fix bug in GIF palette transparency (SpartanJ)
+
8286 1.27 (2010-08-01)
+
8287 cast-to-stbi_uc to fix warnings
+
8288 1.26 (2010-07-24)
+
8289 fix bug in file buffering for PNG reported by SpartanJ
+
8290 1.25 (2010-07-17)
+
8291 refix trans_data warning (Won Chun)
+
8292 1.24 (2010-07-12)
+
8293 perf improvements reading from files on platforms with lock-heavy fgetc()
+
8294 minor perf improvements for jpeg
+
8295 deprecated type-specific functions so we'll get feedback if they're needed
+
8296 attempt to fix trans_data warning (Won Chun)
+
8297 1.23 fixed bug in iPhone support
+
8298 1.22 (2010-07-10)
+
8299 removed image *writing* support
+
8300 stbi_info support from Jetro Lauha
+
8301 GIF support from Jean-Marc Lienher
+
8302 iPhone PNG-extensions from James Brown
+
8303 warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+
8304 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
+
8305 1.20 added support for Softimage PIC, by Tom Seddon
+
8306 1.19 bug in interlaced PNG corruption check (found by ryg)
+
8307 1.18 (2008-08-02)
+
8308 fix a threading bug (local mutable static)
+
8309 1.17 support interlaced PNG
+
8310 1.16 major bugfix - stbi__convert_format converted one too many pixels
+
8311 1.15 initialize some fields for thread safety
+
8312 1.14 fix threadsafe conversion bug
+
8313 header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+
8314 1.13 threadsafe
+
8315 1.12 const qualifiers in the API
+
8316 1.11 Support installable IDCT, colorspace conversion routines
+
8317 1.10 Fixes for 64-bit (don't use "unsigned long")
+
8318 optimized upsampling by Fabian "ryg" Giesen
+
8319 1.09 Fix format-conversion for PSD code (bad global variables!)
+
8320 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+
8321 1.07 attempt to fix C++ warning/errors again
+
8322 1.06 attempt to fix C++ warning/errors again
+
8323 1.05 fix TGA loading to return correct *comp and use good luminance calc
+
8324 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
+
8325 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+
8326 1.02 support for (subset of) HDR files, float interface for preferred access to them
+
8327 1.01 fix bug: possible bug in handling right-side up bmps... not sure
+
8328 fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+
8329 1.00 interface to zlib that skips zlib header
+
8330 0.99 correct handling of alpha in palette
+
8331 0.98 TGA loader by lonesock; dynamically add loaders (untested)
+
8332 0.97 jpeg errors on too large a file; also catch another malloc failure
+
8333 0.96 fix detection of invalid v value - particleman@mollyrocket forum
+
8334 0.95 during header scan, seek to markers in case of padding
+
8335 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+
8336 0.93 handle jpegtran output; verbose errors
+
8337 0.92 read 4,8,16,24,32-bit BMP files of several formats
+
8338 0.91 output 24-bit Windows 3.0 BMP files
+
8339 0.90 fix a few more warnings; bump version number to approach 1.0
+
8340 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
+
8341 0.60 fix compiling as c++
+
8342 0.59 fix warnings: merge Dave Moore's -Wall fixes
+
8343 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
+
8344 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+
8345 0.56 fix bug: zlib uncompressed mode len vs. nlen
+
8346 0.55 fix bug: restart_interval not initialized to 0
+
8347 0.54 allow NULL for 'int *comp'
+
8348 0.53 fix bug in png 3->4; speedup png decoding
+
8349 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+
8350 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
+
8351 on 'test' only check type, not whether we support this variant
+
8352 0.50 (2006-11-19)
+
8353 first released version
+
8354*/
+
8355
+
8356/*
+
8357------------------------------------------------------------------------------
+
8358This software is available under 2 licenses -- choose whichever you prefer.
+
8359------------------------------------------------------------------------------
+
8360ALTERNATIVE A - MIT License
+
8361Copyright (c) 2017 Sean Barrett
+
8362Permission is hereby granted, free of charge, to any person obtaining a copy of
+
8363this software and associated documentation files (the "Software"), to deal in
+
8364the Software without restriction, including without limitation the rights to
+
8365use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+
8366of the Software, and to permit persons to whom the Software is furnished to do
+
8367so, subject to the following conditions:
+
8368The above copyright notice and this permission notice shall be included in all
+
8369copies or substantial portions of the Software.
+
8370THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+
8371IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+
8372FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+
8373AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+
8374LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+
8375OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+
8376SOFTWARE.
+
8377------------------------------------------------------------------------------
+
8378ALTERNATIVE B - Public Domain (www.unlicense.org)
+
8379This is free and unencumbered software released into the public domain.
+
8380Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+
8381software, either in source code form or as a compiled binary, for any purpose,
+
8382commercial or non-commercial, and by any means.
+
8383In jurisdictions that recognize copyright laws, the author or authors of this
+
8384software dedicate any and all copyright interest in the software to the public
+
8385domain. We make this dedication for the benefit of the public at large and to
+
8386the detriment of our heirs and successors. We intend this dedication to be an
+
8387overt act of relinquishment in perpetuity of all present and future rights to
+
8388this software under copyright law.
+
8389THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+
8390IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+
8391FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+
8392AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+
8393ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+
8394WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
8395------------------------------------------------------------------------------
+
8396*/
+
Definition common.h:34
+
Definition stb_image.h:408
+
+ + + + diff --git a/structConv2D__params-members.html b/structConv2D__params-members.html new file mode 100644 index 00000000..d1317fb4 --- /dev/null +++ b/structConv2D__params-members.html @@ -0,0 +1,96 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Conv2D_params Member List
+
+
+ +

This is the complete list of members for Conv2D_params, including all inherited members.

+ + + + + + + + + + + +
bias (defined in Conv2D_params)Conv2D_params
dilation_height_factor (defined in Conv2D_params)Conv2D_params
dilation_width_factor (defined in Conv2D_params)Conv2D_params
float_activation_max (defined in Conv2D_params)Conv2D_params
float_activation_min (defined in Conv2D_params)Conv2D_params
padding_height (defined in Conv2D_params)Conv2D_params
padding_width (defined in Conv2D_params)Conv2D_params
stride_height (defined in Conv2D_params)Conv2D_params
stride_width (defined in Conv2D_params)Conv2D_params
weight (defined in Conv2D_params)Conv2D_params
+ + + + diff --git a/structConv2D__params.html b/structConv2D__params.html new file mode 100644 index 00000000..f41627c0 --- /dev/null +++ b/structConv2D__params.html @@ -0,0 +1,123 @@ + + + + + + + +TinyChatEngine: Conv2D_params Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Conv2D_params Struct Reference
+
+
+ + + + + + + + + + + + + + + + + + + + + + +

+Public Attributes

+Matrix4D< floatweight
 
+Matrix3D< floatbias
 
+int stride_width = 1
 
+int stride_height = 1
 
+int dilation_width_factor = 1
 
+int dilation_height_factor = 1
 
+int padding_width = 0
 
+int padding_height = 0
 
+float float_activation_min = -std::numeric_limits<float>::max()
 
+float float_activation_max = std::numeric_limits<float>::max()
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPAttention__input-members.html b/structFp32CLIPAttention__input-members.html new file mode 100644 index 00000000..cf6d076c --- /dev/null +++ b/structFp32CLIPAttention__input-members.html @@ -0,0 +1,94 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPAttention_input Member List
+
+
+ +

This is the complete list of members for Fp32CLIPAttention_input, including all inherited members.

+ + + + + + + + + +
attention_mask (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
Fp32CLIPAttention_input(Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, int layer_idx_) (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_inputinline
Fp32CLIPAttention_input(Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, Matrix3D< float > past_key_, Matrix3D< float > past_value_, bool has_past_key_value_, int layer_idx_) (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_inputinline
has_past_key_value (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
hidden_states (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
layer_idx (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
past_key (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
past_value (defined in Fp32CLIPAttention_input)Fp32CLIPAttention_input
+ + + + diff --git a/structFp32CLIPAttention__input.html b/structFp32CLIPAttention__input.html new file mode 100644 index 00000000..98669943 --- /dev/null +++ b/structFp32CLIPAttention__input.html @@ -0,0 +1,121 @@ + + + + + + + +TinyChatEngine: Fp32CLIPAttention_input Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPAttention_input Struct Reference
+
+
+ + + + + + +

+Public Member Functions

Fp32CLIPAttention_input (Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, int layer_idx_)
 
Fp32CLIPAttention_input (Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, Matrix3D< float > past_key_, Matrix3D< float > past_value_, bool has_past_key_value_, int layer_idx_)
 
+ + + + + + + + + + + + + +

+Public Attributes

+Matrix3D< floathidden_states
 
+Matrix3D< floatattention_mask
 
+Matrix3D< floatpast_key
 
+Matrix3D< floatpast_value
 
+bool has_past_key_value = false
 
+int layer_idx
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPAttention__output-members.html b/structFp32CLIPAttention__output-members.html new file mode 100644 index 00000000..a2ccbc0a --- /dev/null +++ b/structFp32CLIPAttention__output-members.html @@ -0,0 +1,89 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPAttention_output Member List
+
+
+ +

This is the complete list of members for Fp32CLIPAttention_output, including all inherited members.

+ + + + +
attn_output (defined in Fp32CLIPAttention_output)Fp32CLIPAttention_output
attn_probs_reshaped (defined in Fp32CLIPAttention_output)Fp32CLIPAttention_output
past_key_value (defined in Fp32CLIPAttention_output)Fp32CLIPAttention_output
+ + + + diff --git a/structFp32CLIPAttention__output.html b/structFp32CLIPAttention__output.html new file mode 100644 index 00000000..d4e7f48a --- /dev/null +++ b/structFp32CLIPAttention__output.html @@ -0,0 +1,102 @@ + + + + + + + +TinyChatEngine: Fp32CLIPAttention_output Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPAttention_output Struct Reference
+
+
+ + + + + + + + +

+Public Attributes

+Matrix3D< floatattn_output
 
+Matrix3D< floatattn_probs_reshaped
 
+std::pair< Matrix3D< float >, Matrix3D< float > > past_key_value
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPEncoderLayer__input-members.html b/structFp32CLIPEncoderLayer__input-members.html new file mode 100644 index 00000000..3d4c7834 --- /dev/null +++ b/structFp32CLIPEncoderLayer__input-members.html @@ -0,0 +1,93 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPEncoderLayer_input Member List
+
+
+ +

This is the complete list of members for Fp32CLIPEncoderLayer_input, including all inherited members.

+ + + + + + + + +
attention_mask (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_input
Fp32CLIPEncoderLayer_input(Matrix3D< float > &hidden_states_, Matrix3D< float > attention_mask_) (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_inputinline
Fp32CLIPEncoderLayer_input(Matrix3D< float > &hidden_states_, Matrix3D< float > attention_mask_, Matrix3D< float > past_key_, Matrix3D< float > past_value_) (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_inputinline
has_past_key_value (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_input
hidden_states (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_input
past_key (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_input
past_value (defined in Fp32CLIPEncoderLayer_input)Fp32CLIPEncoderLayer_input
+ + + + diff --git a/structFp32CLIPEncoderLayer__input.html b/structFp32CLIPEncoderLayer__input.html new file mode 100644 index 00000000..22a0b156 --- /dev/null +++ b/structFp32CLIPEncoderLayer__input.html @@ -0,0 +1,118 @@ + + + + + + + +TinyChatEngine: Fp32CLIPEncoderLayer_input Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPEncoderLayer_input Struct Reference
+
+
+ + + + + + +

+Public Member Functions

Fp32CLIPEncoderLayer_input (Matrix3D< float > &hidden_states_, Matrix3D< float > attention_mask_)
 
Fp32CLIPEncoderLayer_input (Matrix3D< float > &hidden_states_, Matrix3D< float > attention_mask_, Matrix3D< float > past_key_, Matrix3D< float > past_value_)
 
+ + + + + + + + + + + +

+Public Attributes

+Matrix3D< floathidden_states
 
+Matrix3D< floatattention_mask
 
+Matrix3D< floatpast_key
 
+Matrix3D< floatpast_value
 
+bool has_past_key_value = false
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPEncoderLayer__output-members.html b/structFp32CLIPEncoderLayer__output-members.html new file mode 100644 index 00000000..4e020175 --- /dev/null +++ b/structFp32CLIPEncoderLayer__output-members.html @@ -0,0 +1,90 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPEncoderLayer_output Member List
+
+
+ +

This is the complete list of members for Fp32CLIPEncoderLayer_output, including all inherited members.

+ + + + + +
attentions (defined in Fp32CLIPEncoderLayer_output)Fp32CLIPEncoderLayer_output
Fp32CLIPEncoderLayer_output(Matrix3D< float > hidden_states_, Matrix3D< float > attentions_, std::pair< Matrix3D< float >, Matrix3D< float > > past_key_value_) (defined in Fp32CLIPEncoderLayer_output)Fp32CLIPEncoderLayer_outputinline
hidden_states (defined in Fp32CLIPEncoderLayer_output)Fp32CLIPEncoderLayer_output
past_key_value (defined in Fp32CLIPEncoderLayer_output)Fp32CLIPEncoderLayer_output
+ + + + diff --git a/structFp32CLIPEncoderLayer__output.html b/structFp32CLIPEncoderLayer__output.html new file mode 100644 index 00000000..db974ea1 --- /dev/null +++ b/structFp32CLIPEncoderLayer__output.html @@ -0,0 +1,109 @@ + + + + + + + +TinyChatEngine: Fp32CLIPEncoderLayer_output Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPEncoderLayer_output Struct Reference
+
+
+ + + + +

+Public Member Functions

Fp32CLIPEncoderLayer_output (Matrix3D< float > hidden_states_, Matrix3D< float > attentions_, std::pair< Matrix3D< float >, Matrix3D< float > > past_key_value_)
 
+ + + + + + + +

+Public Attributes

+Matrix3D< floathidden_states
 
+Matrix3D< floatattentions
 
+std::pair< Matrix3D< float >, Matrix3D< float > > past_key_value
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPEncoder__input-members.html b/structFp32CLIPEncoder__input-members.html new file mode 100644 index 00000000..eb896b97 --- /dev/null +++ b/structFp32CLIPEncoder__input-members.html @@ -0,0 +1,93 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPEncoder_input Member List
+
+
+ +

This is the complete list of members for Fp32CLIPEncoder_input, including all inherited members.

+ + + + + + + + +
attention_mask (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_input
Fp32CLIPEncoder_input(Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_) (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_inputinline
Fp32CLIPEncoder_input(Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, std::vector< Matrix3D< float > > past_keys_, std::vector< Matrix3D< float > > past_values_) (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_inputinline
has_past_keys_values (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_input
hidden_states (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_input
past_keys (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_input
past_values (defined in Fp32CLIPEncoder_input)Fp32CLIPEncoder_input
+ + + + diff --git a/structFp32CLIPEncoder__input.html b/structFp32CLIPEncoder__input.html new file mode 100644 index 00000000..5d5baff6 --- /dev/null +++ b/structFp32CLIPEncoder__input.html @@ -0,0 +1,118 @@ + + + + + + + +TinyChatEngine: Fp32CLIPEncoder_input Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPEncoder_input Struct Reference
+
+
+ + + + + + +

+Public Member Functions

Fp32CLIPEncoder_input (Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_)
 
Fp32CLIPEncoder_input (Matrix3D< float > hidden_states_, Matrix3D< float > attention_mask_, std::vector< Matrix3D< float > > past_keys_, std::vector< Matrix3D< float > > past_values_)
 
+ + + + + + + + + + + +

+Public Attributes

+Matrix3D< floathidden_states
 
+Matrix3D< floatattention_mask
 
+std::vector< Matrix3D< float > > past_keys
 
+std::vector< Matrix3D< float > > past_values
 
+bool has_past_keys_values
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPEncoder__output-members.html b/structFp32CLIPEncoder__output-members.html new file mode 100644 index 00000000..e6b08c5e --- /dev/null +++ b/structFp32CLIPEncoder__output-members.html @@ -0,0 +1,89 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPEncoder_output Member List
+
+
+ +

This is the complete list of members for Fp32CLIPEncoder_output, including all inherited members.

+ + + + +
last_hidden_state (defined in Fp32CLIPEncoder_output)Fp32CLIPEncoder_output
past_keys (defined in Fp32CLIPEncoder_output)Fp32CLIPEncoder_output
past_values (defined in Fp32CLIPEncoder_output)Fp32CLIPEncoder_output
+ + + + diff --git a/structFp32CLIPEncoder__output.html b/structFp32CLIPEncoder__output.html new file mode 100644 index 00000000..c52ffbb6 --- /dev/null +++ b/structFp32CLIPEncoder__output.html @@ -0,0 +1,102 @@ + + + + + + + +TinyChatEngine: Fp32CLIPEncoder_output Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPEncoder_output Struct Reference
+
+
+ + + + + + + + +

+Public Attributes

+Matrix3D< floatlast_hidden_state
 
+std::vector< Matrix3D< float > > past_keys
 
+std::vector< Matrix3D< float > > past_values
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPVisionTransformer__input-members.html b/structFp32CLIPVisionTransformer__input-members.html new file mode 100644 index 00000000..0cb0a89e --- /dev/null +++ b/structFp32CLIPVisionTransformer__input-members.html @@ -0,0 +1,93 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPVisionTransformer_input Member List
+
+
+ +

This is the complete list of members for Fp32CLIPVisionTransformer_input, including all inherited members.

+ + + + + + + + +
Fp32CLIPVisionTransformer_input() (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_inputinline
Fp32CLIPVisionTransformer_input(Matrix3D< float > input_image_) (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_inputinline
Fp32CLIPVisionTransformer_input(Matrix3D< float > input_image_, std::vector< Matrix3D< float > > past_keys_, std::vector< Matrix3D< float > > past_values_) (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_inputinline
has_past_keys_values (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_input
input_image (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_input
past_keys (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_input
past_values (defined in Fp32CLIPVisionTransformer_input)Fp32CLIPVisionTransformer_input
+ + + + diff --git a/structFp32CLIPVisionTransformer__input.html b/structFp32CLIPVisionTransformer__input.html new file mode 100644 index 00000000..f3c39f56 --- /dev/null +++ b/structFp32CLIPVisionTransformer__input.html @@ -0,0 +1,115 @@ + + + + + + + +TinyChatEngine: Fp32CLIPVisionTransformer_input Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPVisionTransformer_input Struct Reference
+
+
+ + + + + + +

+Public Member Functions

Fp32CLIPVisionTransformer_input (Matrix3D< float > input_image_)
 
Fp32CLIPVisionTransformer_input (Matrix3D< float > input_image_, std::vector< Matrix3D< float > > past_keys_, std::vector< Matrix3D< float > > past_values_)
 
+ + + + + + + + + +

+Public Attributes

+Matrix3D< floatinput_image
 
+std::vector< Matrix3D< float > > past_keys
 
+std::vector< Matrix3D< float > > past_values
 
+bool has_past_keys_values
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32CLIPVisionTransformer__output-members.html b/structFp32CLIPVisionTransformer__output-members.html new file mode 100644 index 00000000..44d145ab --- /dev/null +++ b/structFp32CLIPVisionTransformer__output-members.html @@ -0,0 +1,89 @@ + + + + + + + +TinyChatEngine: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+
Fp32CLIPVisionTransformer_output Member List
+
+
+ +

This is the complete list of members for Fp32CLIPVisionTransformer_output, including all inherited members.

+ + + + +
last_hidden_state (defined in Fp32CLIPVisionTransformer_output)Fp32CLIPVisionTransformer_output
past_keys (defined in Fp32CLIPVisionTransformer_output)Fp32CLIPVisionTransformer_output
past_values (defined in Fp32CLIPVisionTransformer_output)Fp32CLIPVisionTransformer_output
+ + + + diff --git a/structFp32CLIPVisionTransformer__output.html b/structFp32CLIPVisionTransformer__output.html new file mode 100644 index 00000000..e0c7523d --- /dev/null +++ b/structFp32CLIPVisionTransformer__output.html @@ -0,0 +1,102 @@ + + + + + + + +TinyChatEngine: Fp32CLIPVisionTransformer_output Struct Reference + + + + + + + + + + + +
+
+ + + + + + +
+
TinyChatEngine +
+
+
+ + + + + + + + +
+
+ + +
+
+
+
+
+
Loading...
+
Searching...
+
No Matches
+
+
+
+
+ +
+
+ +
Fp32CLIPVisionTransformer_output Struct Reference
+
+
+ + + + + + + + +

+Public Attributes

+Matrix3D< floatlast_hidden_state
 
+std::vector< Matrix3D< float > > past_keys
 
+std::vector< Matrix3D< float > > past_values
 
+
The documentation for this struct was generated from the following file: +
+ + + + diff --git a/structFp32LlamaForCausalLM__input-members.html b/structFp32LlamaForCausalLM__input-members.html index 3830108d..8feca07e 100644 --- a/structFp32LlamaForCausalLM__input-members.html +++ b/structFp32LlamaForCausalLM__input-members.html @@ -80,10 +80,15 @@ Fp32LlamaForCausalLM_input() (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_inputinline Fp32LlamaForCausalLM_input(Matrix3D< int > input_ids_) (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_inputinline Fp32LlamaForCausalLM_input(Matrix3D< int > input_ids_, std::vector< Matrix3D< float > > past_keys_, std::vector< Matrix3D< float > > past_values_) (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_inputinline + Fp32LlamaForCausalLM_input(Matrix3D< int > input_ids_, Matrix3D< float > image_embed_, Matrix3D< int > second_input_ids_) (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_inputinline + Fp32LlamaForCausalLM_input(Matrix3D< int > input_ids_, Matrix3D< float > image_embed_) (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_inputinline has_past_keys_values (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input - input_ids (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input + image_embed (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input + input_ids (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input + is_llava (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input past_keys (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input past_values (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input + second_input_ids (defined in Fp32LlamaForCausalLM_input)Fp32LlamaForCausalLM_input