doc: fix docstring of pos_encoding_mode field (#148)

some format issue.
flashinfer-ai · Mar 3, 2024 · de129b9 · de129b9
1 parent bf2117b
commit de129b9
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 12 deletions.
diff --git a/python/flashinfer/decode.py b/python/flashinfer/decode.py
@@ -79,7 +79,7 @@ def single_decode_with_kv_cache(
         The layout of the input k/v tensors, could be either ``NHD`` or ``HND``.
     pos_encoding_mode : str
         Whether to apply RoPE on-the-fly inside attention kernels, could be
-        ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+        ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
     sm_scale : Optional[float]
         The scale of softmax, if not provided, will be set to ``1 / sqrt(head_dim)``.
     rope_scale : Optional[float]
@@ -168,7 +168,7 @@ def batch_decode_with_padded_kv_cache(
         The layout of the input k/v tensors, could be either ``NHD`` or ``HND``.
     pos_encoding_mode : str
         Whether to apply RoPE on-the-fly inside attention kernels, could be
-        ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+        ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
     sm_scale : Optional[float]
         The scale of softmax, if not provided, will be set to ``1 / sqrt(head_dim)``.
     rope_scale : Optional[float]
@@ -257,7 +257,7 @@ def batch_decode_with_padded_kv_cache_return_lse(
         The layout of the input k/v tensors, could be either ``NHD`` or ``HND``.
     pos_encoding_mode : str
         Whether to apply RoPE on-the-fly inside attention kernels, could be
-        ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+        ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
     sm_scale : Optional[float]
         The scale of softmax, if not provided, will be set to ``1 / sqrt(head_dim)``.
     rope_scale : Optional[float]
@@ -456,7 +456,7 @@ def begin_forward(
             The page size of the paged kv cache
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         data_type : Union[str, torch.dtype]
             The data type of the paged kv cache
 
@@ -525,7 +525,7 @@ def forward(
             :attr:`kv_layout` is ``HND``.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         sm_scale : Optional[float]
             The scale of softmax, if not provided, will be set to ``1 / sqrt(head_dim)``.
         rope_scale : Optional[float]
@@ -586,7 +586,7 @@ def forward_return_lse(
             :attr:`kv_layout` is ``HND``.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         sm_scale : Optional[float]
             The scale of softmax, if not provided, will be set to ``1 / sqrt(head_dim)``.
         rope_scale : Optional[float]

diff --git a/python/flashinfer/prefill.py b/python/flashinfer/prefill.py
@@ -93,7 +93,7 @@ def single_prefill_with_kv_cache(
         The layout of the input k/v tensors, could be either ``NHD`` or ``HND``.
     pos_encoding_mode : str
         Whether to apply RoPE on-the-fly inside attention kernels, could be
-        ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+        ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
     allow_fp16_qk_reduction : bool
         Whether to use f16 for qk reduction (faster at the cost of slight precision
         loss).
@@ -191,7 +191,7 @@ def single_prefill_with_kv_cache_return_lse(
         The layout of the input k/v tensors, could be either ``NHD`` or ``HND``.
     pos_encoding_mode : str
         Whether to apply RoPE on-the-fly inside attention kernels, could be
-        ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+        ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
     allow_fp16_qk_reduction : bool
         Whether to use f16 for qk reduction (faster at the cost of slight precision
         loss).
@@ -460,7 +460,7 @@ def forward(
             Whether to apply causal mask to the attention matrix.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         allow_fp16_qk_reduction : bool
             Whether to use f16 for qk reduction (faster at the cost of slight precision
             loss).
@@ -529,7 +529,7 @@ def forward_return_lse(
             Whether to apply causal mask to the attention matrix.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         allow_fp16_qk_reduction : bool
             Whether to use f16 for qk reduction (faster at the cost of slight precision
             loss).
@@ -744,7 +744,7 @@ def forward(
             Whether to apply causal mask to the attention matrix.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         allow_fp16_qk_reduction : bool
             Whether to use f16 for qk reduction (faster at the cost of slight precision
             loss).
@@ -811,7 +811,7 @@ def forward_return_lse(
             Whether to apply causal mask to the attention matrix.
         pos_encoding_mode : str
             Whether to apply RoPE on-the-fly inside attention kernels, could be
-            ``NONE``/``ROPE_LLAMA``(LLAMA style rotary embedding)/``ALIBI``.
+            ``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
         allow_fp16_qk_reduction : bool
             Whether to use f16 for qk reduction (faster at the cost of slight precision
             loss).