Skip to content

Commit

Permalink
[Bugfix] Python package do not have __version__ (#104)
Browse files Browse the repository at this point in the history
Also fix some format issues in python docstring.
  • Loading branch information
yzh119 authored Feb 1, 2024
1 parent 0bedda7 commit c55cd60
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 19 deletions.
6 changes: 6 additions & 0 deletions python/flashinfer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@
BatchPrefillWithSharedPrefixPagedKVCacheWrapper,
)
from .page import append_paged_kv_cache

try:
from ._build_meta import __version__ as __version__
except ImportError:
with open("version.txt") as f:
__version__ = f.read().strip()
16 changes: 8 additions & 8 deletions python/flashinfer/cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def merge_state_in_place(
s_other : torch.Tensor
The other logsumexp value to be merged, expected to be a float32 tensor,
shape: ``(seq_len, num_heads)``.
Example
-------
>>> import torch
Expand All @@ -135,7 +135,7 @@ def merge_state_in_place(


def merge_states(v: torch.Tensor, s: torch.Tensor):
r"""Merge multiple attention states (v, s).
r"""Merge multiple attention states (v, s).
Parameters
----------
Expand All @@ -154,7 +154,7 @@ def merge_states(v: torch.Tensor, s: torch.Tensor):
S : torch.Tensor
The logsumexp value from the merged KV-segments, shape:
``[seq_len, num_heads]``.
Example
-------
>>> import torch
Expand Down Expand Up @@ -229,7 +229,7 @@ def batch_decode_with_shared_prefix_padded_kv_cache(
-------
V : torch.Tensor
The attention output, shape: ``[batch_size, num_heads, head_dim]``
Example
-------
>>> import torch
Expand Down Expand Up @@ -312,7 +312,7 @@ class BatchDecodeWithSharedPrefixPagedKVCacheWrapper:
... )
>>> batch_size = 7
>>> shared_prefix_len = 8192
>>> unique_kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> unique_kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> unique_kv_page_indptr = torch.tensor(
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
... )
Expand Down Expand Up @@ -355,7 +355,7 @@ class BatchDecodeWithSharedPrefixPagedKVCacheWrapper:
... # compute batch decode attention, reuse auxiliary data structures for all layers
... o = wrapper.forward(q, k_shared, v_shared, unique_kv_data)
... outputs.append(o)
...
...
>>> # clear auxiliary data structures
>>> wrapper.end_forward()
>>> outputs[0].shape
Expand Down Expand Up @@ -547,7 +547,7 @@ class BatchPrefillWithSharedPrefixPagedKVCacheWrapper:
>>> qo_indptr = torch.tensor(
... [0, 33, 44, 55, 66, 77, 88, nnz_qo], dtype=torch.int32, device="cuda:0"
... )
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> paged_kv_indptr = torch.tensor(
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
... )
Expand Down Expand Up @@ -590,7 +590,7 @@ class BatchPrefillWithSharedPrefixPagedKVCacheWrapper:
... q, k_shared, v_shared, kv_data, causal=True
... )
... outputs.append(o)
...
...
s[0].shape>>> # clear auxiliary data structures
>>> prefill_wrapper.end_forward()
>>> outputs[0].shape
Expand Down
10 changes: 5 additions & 5 deletions python/flashinfer/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def batch_decode_with_padded_kv_cache(
-------
torch.Tensor
The attention output, shape: ``[batch_size, num_qo_heads, head_dim]``.
Examples
--------
>>> import torch
Expand Down Expand Up @@ -270,7 +270,7 @@ def batch_decode_with_padded_kv_cache_return_lse(
The attention output, shape: [batch_size, num_qo_heads, head_dim]
S : torch.Tensor
The logsumexp of attention scores, Shape: [batch_size, num_qo_heads]
Examples
--------
>>> import torch
Expand Down Expand Up @@ -342,7 +342,7 @@ class BatchDecodeWithPagedKVCacheWrapper:
... workspace_buffer, "NHD"
... )
>>> batch_size = 7
>>> kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> kv_page_indptr = torch.tensor(
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
... )
Expand Down Expand Up @@ -374,7 +374,7 @@ class BatchDecodeWithPagedKVCacheWrapper:
... # compute batch decode attention, reuse auxiliary data structures for all layers
... o = decode_wrapper.forward(q, kv_data)
... outputs.append(o)
...
...
>>> # clear auxiliary data structures
>>> decode_wrapper.end_forward()
>>> outputs[0].shape
Expand Down Expand Up @@ -589,7 +589,7 @@ def forward_return_lse(
The attention output, shape: ``[batch_size, num_qo_heads, head_dim]``.
S : torch.Tensor
The logsumexp of attention scores, Shape: ``[batch_size, num_qo_heads]``.
Notes
-----
Please refer to the :ref:`tutorial <recursive-attention>` for a detailed
Expand Down
6 changes: 3 additions & 3 deletions python/flashinfer/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def append_paged_kv_cache(
shape: ``[batch_size]``.
kv_layout : str
The layout of the paged kv-cache, either ``NHD`` or ``HND``.
Example
-------
>>> import torch
Expand Down Expand Up @@ -96,7 +96,7 @@ def append_paged_kv_cache(
>>> # 25 = (2 - 1) * 16 + 9
>>> # 22 = (2 - 1) * 16 + 6
>>> kv_last_page_len = torch.tensor([13, 8, 9, 6], dtype=torch.int32, device="cuda:0")
>>>
>>>
>>> flashinfer.append_paged_kv_cache(
... k_append,
... v_append,
Expand All @@ -111,7 +111,7 @@ def append_paged_kv_cache(
-----
Please refer to the :ref:`tutorial <recursive-attention>` for a detailed
explanation of the log-sum-exp function and attention states.
The function assumes that the space for appended k/v have already been allocated,
which means :attr:`kv_indices`, :attr:`kv_indptr`, :attr:`kv_last_page_len` has
incorporated appended k/v.
Expand Down
6 changes: 3 additions & 3 deletions python/flashinfer/prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ class BatchPrefillWithPagedKVCacheWrapper:
>>> qo_indptr = torch.tensor(
... [0, 33, 44, 55, 66, 77, 88, nnz_qo], dtype=torch.int32, device="cuda:0"
... )
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
>>> paged_kv_indptr = torch.tensor(
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
... )
Expand Down Expand Up @@ -308,7 +308,7 @@ class BatchPrefillWithPagedKVCacheWrapper:
... q, kv_data, causal=True
... )
... outputs.append(o)
...
...
>>> # clear auxiliary data structures
>>> prefill_wrapper.end_forward()
>>> outputs[0].shape
Expand Down Expand Up @@ -582,7 +582,7 @@ class BatchPrefillWithRaggedKVCacheWrapper:
... q, k, v, causal=True
... )
... outputs.append(o)
...
...
>>> # clear auxiliary data structures
>>> prefill_wrapper.end_forward()
>>> outputs[0].shape
Expand Down

0 comments on commit c55cd60

Please sign in to comment.