Skip to content

Commit

Permalink
Merge pull request #24 from JarKz/utf8support
Browse files Browse the repository at this point in the history
feat!: add utf8 support
  • Loading branch information
chrisgrieser authored Dec 21, 2023
2 parents 63ff493 + dd9f775 commit b44e256
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 41 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ call.

-- packer
use { "chrisgrieser/nvim-spider" }

-- lazy.nvim
{ "chrisgrieser/nvim-spider", lazy = true },
```

No keybindings are created by default. Below are the mappings to replace the
Expand Down Expand Up @@ -127,6 +130,30 @@ vim.keymap.set(
> using `function() require("spider").motion("w") end` as third argument of
> the keymap, dot-repeatability will not work.
## UTF-8 support

For adding UTF-8 support for matching non-ASCII text, add rocks `luautf8` in packer.nvim. Or add [dependency](https://github.com/theHamsta/nvim_rocks) like below, in lazy.nvim example.

```lua
-- packer
{ "chrisgrieser/nvim-spider", rocks = "luautf8" }

-- lazy.nvim
{
"chrisgrieser/nvim-spider",
lazy = true,
dependencies = {
"theHamsta/nvim_rocks",
event = "VeryLazy",
build = "pip3 install --user hererocks && python3 -mhererocks . -j2.1.0-beta3 -r3.0.0 && cp nvim_rocks.lua lua",
config = function()
local rocks = require("nvim_rocks")
rocks.ensure_installed("luautf8")
end,
}
},
```

## Configuration
The `.setup()` call is optional.

Expand Down
123 changes: 82 additions & 41 deletions lua/spider/init.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,41 @@
local M = {}
local patternVariants = require("spider.pattern-variants")

local str_func = {
reverse = string.reverse,
find = string.find,
gmatch = string.gmatch,
len = string.len,
init_pos = function(_, col)
col = col + 1 -- from 0-based indexing to 1-based
local startCol = col
return col, startCol
end,
offset = function(_, pos)
return pos
end,
}

local ok, utf8 = pcall(require, "lua-utf8")

if ok then
-- remapping functions to utf8 supported functions
for name, _ in pairs(str_func) do
if utf8[name] then
str_func[name] = utf8[name]
end
end
str_func.init_pos = function(s, col)
local offset = 1
for p, _ in utf8.codes(s) do
if p > col then break end
offset = offset + 1
end
local startOffset = offset
return offset, startOffset
end
end

--------------------------------------------------------------------------------
-- CONFIG
---@class (exact) optsObj
Expand Down Expand Up @@ -35,18 +70,20 @@ end
---@param line string
---@param pattern string
---@param endOfWord boolean look for the end of the pattern instead of the start
---@param col number look for the first match after this number
---@param offset number -- look for the first match after this number
---@nodiscard
---@return number|nil -- returns nil if none is found
local function firstMatchAfter(line, pattern, endOfWord, col)
---@return number|nil returns nil if none is found
local function firstMatchAfter(line, pattern, endOfWord, offset)
-- special case: pattern with ^/$, since there can only be one match
-- and since gmatch won't work with them
if pattern:find("^%^") or pattern:find("%$$") then
if pattern:find("%$$") and col >= #line then return nil end -- checking for high col count for virtualedit
if pattern:find("^%^") and col ~= 1 then return nil end
local start, endPos = line:find(pattern)
if pattern:find("%$$") and offset > str_func.len(line) then return nil end -- checking for high col count for virtualedit
if pattern:find("^%^") and offset ~= 0 then return nil end

local start, endPos = str_func.find(line, pattern)
if start == nil or endPos == nil then return nil end

local pos = endOfWord and endPos or start
if pos and not endOfWord then pos = pos - 1 end
return pos
end

Expand All @@ -56,47 +93,46 @@ local function firstMatchAfter(line, pattern, endOfWord, col)
pattern = "()" .. pattern
end
-- `:gmatch` will return all locations in the string where the pattern is
-- found, the loop looks for the first one that is higher than the col to
-- look from
for pos in line:gmatch(pattern) do
if endOfWord and pos > col then return pos - 1 end
if not endOfWord and pos >= col then return pos - 1 end
-- found, the loop looks for the first one that is higher than the offset
-- to look from
for pos in str_func.gmatch(line, pattern) do
if type(pos) == "string" then return nil end

if endOfWord then pos = pos - 1 end
if pos > offset then return pos end
end
return nil
end

---@param line string input string where to find the pattern
---@param col number position to start looking from
---@param offset number position to start looking from
---@param key "w"|"e"|"b"|"ge" the motion to perform
---@param opts optsObj configuration table as in setup()
---@nodiscard
---@return number|nil pattern position, returns nil if no pattern was found
local function getNextPosition(line, col, key, opts)
local function getNextPosition(line, offset, key, opts)
local endOfWord = (key == "ge") or (key == "e")
local backwards = (key == "b") or (key == "ge")
local patterns = patternVariants.get(opts, backwards)

if backwards then
line = line:reverse()
line = str_func.reverse(line)
endOfWord = not endOfWord
if col == -1 then
col = 1
else
col = #line - col + 1
end

local isSameLine = offset ~= 0
if isSameLine then offset = str_func.len(line) - offset + 1 end
end

-- search for patterns, get closest one
local matches = {}
for _, pattern in pairs(patterns) do
local match = firstMatchAfter(line, pattern, endOfWord, col)
local match = firstMatchAfter(line, pattern, endOfWord, offset)
if match then table.insert(matches, match) end
end
if vim.tbl_isempty(matches) then return nil end -- none found in this line
local nextPos = math.min(unpack(matches))

if not endOfWord then nextPos = nextPos + 1 end
if backwards then nextPos = #line - nextPos + 1 end
if backwards then nextPos = str_func.len(line) - nextPos + 1 end
return nextPos
end

Expand All @@ -118,44 +154,49 @@ function M.motion(key, motionOpts)
end

local row, col = unpack(vim.api.nvim_win_get_cursor(0))
local startCol, startRow = col, row
local lastRow = vim.api.nvim_buf_line_count(0)
local startRow = row
local lastRow = vim.fn.line("$")
local forwards = key == "w" or key == "e"

-- loop through counts
for i = 1, vim.v.count1, 1 do
if forwards then
col = col + 2 -- +1 (next position), +1 lua indexing
elseif not forwards and i > 1 then
col = col - 1 -- next pos
end
local line = getline(row)
local offset, startOffset = str_func.init_pos(line, col)

-- loop through rows (if next location not found in line)
-- looping through counts
for _ = 1, vim.v.count1, 1 do
-- looping through rows (if next location not found in line)
while true do
local line = getline(row)
col = getNextPosition(line, col, key, opts)
local onTheSamePos = (col == startCol + 1 and row == startRow)
if col and not onTheSamePos then break end
col = forwards and 1 or -1
local result = getNextPosition(line, offset, key, opts)
if result then
offset = result
local onTheSamePos = (offset == startOffset and row == startRow)
if not onTheSamePos then break end
end

offset = 0
row = forwards and row + 1 or row - 1
if row > lastRow or row < 1 then return end
line = getline(row)
end
end

col = col - 1 -- lua string indices different
col = str_func.offset(line, offset) - 1 -- lua string indices different

-- operator-pending specific considerations (see issues #3 and #5)
local mode = vim.api.nvim_get_mode().mode
local isOperatorPending = mode == "no" -- = [n]ormal & [o]perator, not the word "no"
if isOperatorPending then
local lastCol = vim.fn.col("$")
if key == "e" then col = col + 1 end
if key == "e" then
offset = offset + 1
col = str_func.offset(line, offset) - 1
end

if lastCol - 1 == col then
-- HACK columns are end-exclusive, cannot actually target the last character
-- in the line otherwise without switching to visual mode?!
vim.cmd.normal { "v", bang = true }
col = col - 1 -- SIC indices in visual off-by-one compared to normal
offset = offset - 1
col = str_func.offset(line, offset) - 1 -- SIC indices in visual off-by-one compared to normal
end
end

Expand Down

0 comments on commit b44e256

Please sign in to comment.