-
Notifications
You must be signed in to change notification settings - Fork 79
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ab90cc4
commit c3b94c7
Showing
29 changed files
with
2,310 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
|
||
assets/ | ||
*.bin | ||
!llama_vocab.bin | ||
!starcoder_vocab.bin | ||
*.zip | ||
*.txt | ||
!requirements.txt | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
Adapted from llama.cpp and starcoder.cpp: | ||
https://github.com/ggerganov/llama.cpp | ||
https://github.com/bigcode-project/starcoder.cpp | ||
*/ | ||
|
||
#ifndef GPTBIGCODE_TOKENIZER_H | ||
#define GPTBIGCODE_TOKENIZER_H | ||
|
||
#include <cstdint> | ||
#include <cstdio> | ||
#include <iostream> | ||
#include <map> | ||
#include <queue> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
#include <random> | ||
#include <thread> | ||
#include <fstream> | ||
|
||
// | ||
// Vocab utils | ||
// | ||
|
||
std::string trim(const std::string & s); | ||
|
||
std::string replace( | ||
const std::string & s, | ||
const std::string & from, | ||
const std::string & to); | ||
|
||
struct starcoder_vocab { | ||
std::map<std::string, int32_t> token_to_id; | ||
std::map<int32_t, std::string> id_to_token; | ||
std::vector<std::string> special_tokens; | ||
|
||
void add_special_token(const std::string & token); | ||
}; | ||
|
||
/* | ||
* Tokenizer | ||
*/ | ||
starcoder_vocab starcoder_init_vocab(const std::string & vocab_file); | ||
|
||
const char* starcoder_id_to_token(starcoder_vocab& vocab, int id); | ||
|
||
int starcoder_tokenize(const starcoder_vocab &vocab, const std::string &text, std::vector<int> &final_tokens, int n_max_tokens); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#include <utility> | ||
|
||
#include "common.h" | ||
#include "operators.h" | ||
|
||
struct Fp32GPTBigCodeAttention_output { | ||
Matrix3D<float> attn_output; | ||
Matrix3D<float> attn_probs_reshaped; | ||
std::pair<Matrix3D<float>, Matrix3D<float>> past_key_value; | ||
}; | ||
struct Fp32GPTBigCodeAttention_input { | ||
Matrix3D<float> hidden_states; | ||
Matrix3D<float> attention_mask; | ||
Matrix3D<float> past_key, past_value; | ||
bool has_past_key_value = false; | ||
int layer_idx; | ||
|
||
Fp32GPTBigCodeAttention_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_, int layer_idx_) | ||
: hidden_states(hidden_states_), attention_mask(attention_mask_), layer_idx(layer_idx_) {} | ||
|
||
Fp32GPTBigCodeAttention_input(Matrix3D<float> hidden_states_, Matrix3D<float> attention_mask_, Matrix3D<float> past_key_, | ||
Matrix3D<float> past_value_, bool has_past_key_value_, int layer_idx_) | ||
: hidden_states(hidden_states_), | ||
attention_mask(attention_mask_), | ||
past_key(past_key_), | ||
past_value(past_value_), | ||
has_past_key_value(has_past_key_value_), | ||
layer_idx(layer_idx_) {} | ||
}; | ||
|
||
class Fp32GPTBigCodeAttention { | ||
public: | ||
Fp32GPTBigCodeAttention(std::string param_path, const struct model_config config); | ||
Fp32GPTBigCodeAttention() {} | ||
static void initialized_memory(const struct model_config config); | ||
struct Fp32GPTBigCodeAttention_output forward(const struct Fp32GPTBigCodeAttention_input &input); | ||
|
||
private: | ||
void unshape(Matrix3D<float> shaped, Matrix3D<float> unshape, int sqlen); | ||
void shape_qkv(Matrix3D<float> unshape, Matrix3D<float> shaped_q, Matrix3D<float> shaped_k, | ||
Matrix3D<float> shaped_v, int sqlen); | ||
float scaling; | ||
int embed_dim, num_heads, head_dim, kv_heads, kv_dim; | ||
BMM_F32T qk_bmm, pv_bmm; | ||
Linear_FP c_attn, c_proj; | ||
std::string profile_name = "Fp32GPTBigCodeAttention"; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include <cstdlib> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "Fp32GPTBigCodeDecoderLayer.h" | ||
#include "common.h" | ||
#include "operators.h" | ||
|
||
struct Fp32GPTBigCodeDecoder_output { | ||
Matrix3D<float> last_hidden_state; | ||
std::vector<Matrix3D<float>> past_keys, past_values; | ||
}; | ||
struct Fp32GPTBigCodeDecoder_input { | ||
Matrix3D<int> input_ids; | ||
std::vector<Matrix3D<float>> past_keys, past_values; | ||
bool has_past_keys_values; | ||
|
||
Fp32GPTBigCodeDecoder_input(Matrix3D<int> input_ids_) : input_ids(input_ids_) { has_past_keys_values = false; } | ||
Fp32GPTBigCodeDecoder_input(Matrix3D<int> input_ids_, std::vector<Matrix3D<float>> past_keys_, | ||
std::vector<Matrix3D<float>> past_values_) | ||
: input_ids(input_ids_), past_keys(past_keys_), past_values(past_values_) { | ||
has_past_keys_values = true; | ||
} | ||
}; | ||
|
||
class Fp32GPTBigCodeDecoder { | ||
public: | ||
Fp32GPTBigCodeDecoder(std::string param_path, const struct model_config config); | ||
Fp32GPTBigCodeDecoder(){}; | ||
Matrix3D<float> prepare_decoder_attention_mask(int length, int past_length); | ||
Matrix3D<float> get_position_embed(int sql_length, int past_length); | ||
struct Fp32GPTBigCodeDecoder_output forward(const struct Fp32GPTBigCodeDecoder_input& input); | ||
Embedding wte, wpe; | ||
int voc_size, embed_dim, padding_idx, hidden_dim, num_heads, max_position_embeddings; | ||
std::vector<Fp32GPTBigCodeDecoderLayer> layers; | ||
LayerNorm ln_f; | ||
std::string profile_name = "Fp32GPTBigCodeDecoder"; | ||
|
||
private: | ||
float* attention_mask_buf; | ||
float* pos_embeds_buf; | ||
float* last_hidden_states_buf; | ||
float* hidden_states_buf; | ||
}; |
Oops, something went wrong.