Skip to content

Commit

Permalink
falcon : minor
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Aug 22, 2023
1 parent 2d58444 commit 0ec27ad
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 17 deletions.
4 changes: 2 additions & 2 deletions ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
alloc->n_free_blocks++;
}

void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n) {
int pos = 0;
for (int i = 0; i < n; i++) {
if (list[i] != -1) {
Expand Down Expand Up @@ -547,7 +547,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
struct ggml_tensor * view_src = get_view_source(parent);
struct hash_node * view_src_hn = hash_get(ht, view_src);
view_src_hn->n_views -= 1;
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src->n_children, view_src->n_views);
AT_PRINTF("view_src %s\n", view_src->name);
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
ggml_allocator_free_tensor(alloc, view_src);
}
Expand Down
2 changes: 1 addition & 1 deletion ggml-alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);

// tell the allocator to parse nodes following the order described in the list
// you should call this if your graph are optimized to execute out-of-order
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);

GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
Expand Down
26 changes: 12 additions & 14 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2436,7 +2436,7 @@ static struct ggml_cgraph * llm_build_falcon(
attn_norm),
ggml_repeat(ctx0, model.layers[il].attn_norm_b, attn_norm));

if (hparams.n_head_kv == 8) { // Falcon-40B
if (model.layers[il].attn_norm_2) { // Falcon-40B
cur = ggml_norm(ctx0, inpL);

cur = ggml_add(ctx0,
Expand All @@ -2461,23 +2461,25 @@ static struct ggml_cgraph * llm_build_falcon(
// trickery when trying to accurately dump these views for
// debugging.

const size_t wsize = ggml_type_size(cur->type);

struct ggml_tensor * Qcur = ggml_view_3d(
ctx0, cur, n_embd_head, n_head, N,
n_embd_head * ggml_type_size(GGML_TYPE_F32),
n_embd_head * (n_head + 2 * n_head_kv) * ggml_type_size(GGML_TYPE_F32),
wsize * n_embd_head,
wsize * n_embd_head * (n_head + 2 * n_head_kv),
0);

struct ggml_tensor * Kcur = ggml_view_3d(
ctx0, cur, n_embd_head, n_head_kv, N,
n_embd_head * ggml_type_size(GGML_TYPE_F32),
n_embd_head * (n_head + 2 * n_head_kv) * ggml_type_size(GGML_TYPE_F32),
n_embd_head * n_head * ggml_type_size(GGML_TYPE_F32));
wsize * n_embd_head,
wsize * n_embd_head * (n_head + 2 * n_head_kv),
wsize * n_embd_head * n_head);

struct ggml_tensor * Vcur = ggml_view_3d(
ctx0, cur, n_embd_head, n_head_kv, N,
n_embd_head * ggml_type_size(GGML_TYPE_F32),
n_embd_head * (n_head + 2 * n_head_kv) * ggml_type_size(GGML_TYPE_F32),
n_embd_head * (n_head + n_head_kv) * ggml_type_size(GGML_TYPE_F32));
wsize * n_embd_head,
wsize * n_embd_head * (n_head + 2 * n_head_kv),
wsize * n_embd_head * (n_head + n_head_kv));

// using mode = 2 for neox mode
Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_embd_head, 2, 0);
Expand Down Expand Up @@ -2518,11 +2520,7 @@ static struct ggml_cgraph * llm_build_falcon(
struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);

// KQ_scaled = KQ / sqrt(n_embd/n_head)
struct ggml_tensor * KQ_scaled =
ggml_scale_inplace(ctx0,
KQ,
ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd_head)))
);
struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, KQ_scale);

// KQ_masked = mask_past(KQ_scaled)
struct ggml_tensor * KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
Expand Down

0 comments on commit 0ec27ad

Please sign in to comment.