Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add utf8 support #24

Merged
merged 3 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ call.

-- packer
use { "chrisgrieser/nvim-spider" }

-- lazy.nvim
{ "chrisgrieser/nvim-spider", lazy = true },
```

No keybindings are created by default. Below are the mappings to replace the
Expand Down Expand Up @@ -127,6 +130,30 @@ vim.keymap.set(
> using `function() require("spider").motion("w") end` as third argument of
> the keymap, dot-repeatability will not work.

## UTF-8 support

For adding UTF-8 support for matching non-ASCII text, add rocks `luautf8` in packer.nvim. Or add [dependency](https://github.com/theHamsta/nvim_rocks) like below, in lazy.nvim example.

```lua
-- packer
{ "chrisgrieser/nvim-spider", rocks = "luautf8" }

-- lazy.nvim
{
"chrisgrieser/nvim-spider",
lazy = true,
dependencies = {
"theHamsta/nvim_rocks",
event = "VeryLazy",
build = "pip3 install --user hererocks && python3 -mhererocks . -j2.1.0-beta3 -r3.0.0 && cp nvim_rocks.lua lua",
config = function()
local rocks = require("nvim_rocks")
rocks.ensure_installed("luautf8")
end,
}
},
```

## Configuration
The `.setup()` call is optional.

Expand Down
123 changes: 82 additions & 41 deletions lua/spider/init.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,41 @@
local M = {}
local patternVariants = require("spider.pattern-variants")

local str_func = {
reverse = string.reverse,
find = string.find,
gmatch = string.gmatch,
len = string.len,
init_pos = function(_, col)
col = col + 1 -- from 0-based indexing to 1-based
local startCol = col
return col, startCol
end,
offset = function(_, pos)
return pos
end,
}

local ok, utf8 = pcall(require, "lua-utf8")

if ok then
-- remapping functions to utf8 supported functions
for name, _ in pairs(str_func) do
if utf8[name] then
str_func[name] = utf8[name]
end
end
str_func.init_pos = function(s, col)
local offset = 1
for p, _ in utf8.codes(s) do
if p > col then break end
offset = offset + 1
end
local startOffset = offset
return offset, startOffset
end
end

--------------------------------------------------------------------------------
-- CONFIG
---@class (exact) optsObj
Expand Down Expand Up @@ -35,18 +70,20 @@ end
---@param line string
---@param pattern string
---@param endOfWord boolean look for the end of the pattern instead of the start
---@param col number look for the first match after this number
---@param offset number -- look for the first match after this number
---@nodiscard
---@return number|nil -- returns nil if none is found
local function firstMatchAfter(line, pattern, endOfWord, col)
---@return number|nil returns nil if none is found
local function firstMatchAfter(line, pattern, endOfWord, offset)
-- special case: pattern with ^/$, since there can only be one match
-- and since gmatch won't work with them
if pattern:find("^%^") or pattern:find("%$$") then
if pattern:find("%$$") and col >= #line then return nil end -- checking for high col count for virtualedit
if pattern:find("^%^") and col ~= 1 then return nil end
local start, endPos = line:find(pattern)
if pattern:find("%$$") and offset > str_func.len(line) then return nil end -- checking for high col count for virtualedit
if pattern:find("^%^") and offset ~= 0 then return nil end

local start, endPos = str_func.find(line, pattern)
if start == nil or endPos == nil then return nil end

local pos = endOfWord and endPos or start
if pos and not endOfWord then pos = pos - 1 end
return pos
end

Expand All @@ -56,47 +93,46 @@ local function firstMatchAfter(line, pattern, endOfWord, col)
pattern = "()" .. pattern
end
-- `:gmatch` will return all locations in the string where the pattern is
-- found, the loop looks for the first one that is higher than the col to
-- look from
for pos in line:gmatch(pattern) do
if endOfWord and pos > col then return pos - 1 end
if not endOfWord and pos >= col then return pos - 1 end
-- found, the loop looks for the first one that is higher than the offset
-- to look from
for pos in str_func.gmatch(line, pattern) do
if type(pos) == "string" then return nil end

if endOfWord then pos = pos - 1 end
if pos > offset then return pos end
end
return nil
end

---@param line string input string where to find the pattern
---@param col number position to start looking from
---@param offset number position to start looking from
---@param key "w"|"e"|"b"|"ge" the motion to perform
---@param opts optsObj configuration table as in setup()
---@nodiscard
---@return number|nil pattern position, returns nil if no pattern was found
local function getNextPosition(line, col, key, opts)
local function getNextPosition(line, offset, key, opts)
local endOfWord = (key == "ge") or (key == "e")
local backwards = (key == "b") or (key == "ge")
local patterns = patternVariants.get(opts, backwards)

if backwards then
line = line:reverse()
line = str_func.reverse(line)
endOfWord = not endOfWord
if col == -1 then
col = 1
else
col = #line - col + 1
end

local isSameLine = offset ~= 0
if isSameLine then offset = str_func.len(line) - offset + 1 end
end

-- search for patterns, get closest one
local matches = {}
for _, pattern in pairs(patterns) do
local match = firstMatchAfter(line, pattern, endOfWord, col)
local match = firstMatchAfter(line, pattern, endOfWord, offset)
if match then table.insert(matches, match) end
end
if vim.tbl_isempty(matches) then return nil end -- none found in this line
local nextPos = math.min(unpack(matches))

if not endOfWord then nextPos = nextPos + 1 end
if backwards then nextPos = #line - nextPos + 1 end
if backwards then nextPos = str_func.len(line) - nextPos + 1 end
return nextPos
end

Expand All @@ -118,44 +154,49 @@ function M.motion(key, motionOpts)
end

local row, col = unpack(vim.api.nvim_win_get_cursor(0))
local startCol, startRow = col, row
local lastRow = vim.api.nvim_buf_line_count(0)
local startRow = row
local lastRow = vim.fn.line("$")
local forwards = key == "w" or key == "e"

-- loop through counts
for i = 1, vim.v.count1, 1 do
if forwards then
col = col + 2 -- +1 (next position), +1 lua indexing
elseif not forwards and i > 1 then
col = col - 1 -- next pos
end
local line = getline(row)
local offset, startOffset = str_func.init_pos(line, col)

-- loop through rows (if next location not found in line)
-- looping through counts
for _ = 1, vim.v.count1, 1 do
-- looping through rows (if next location not found in line)
while true do
local line = getline(row)
col = getNextPosition(line, col, key, opts)
local onTheSamePos = (col == startCol + 1 and row == startRow)
if col and not onTheSamePos then break end
col = forwards and 1 or -1
local result = getNextPosition(line, offset, key, opts)
if result then
offset = result
local onTheSamePos = (offset == startOffset and row == startRow)
if not onTheSamePos then break end
end

offset = 0
row = forwards and row + 1 or row - 1
if row > lastRow or row < 1 then return end
line = getline(row)
end
end

col = col - 1 -- lua string indices different
col = str_func.offset(line, offset) - 1 -- lua string indices different

-- operator-pending specific considerations (see issues #3 and #5)
local mode = vim.api.nvim_get_mode().mode
local isOperatorPending = mode == "no" -- = [n]ormal & [o]perator, not the word "no"
if isOperatorPending then
local lastCol = vim.fn.col("$")
if key == "e" then col = col + 1 end
if key == "e" then
offset = offset + 1
col = str_func.offset(line, offset) - 1
end

if lastCol - 1 == col then
-- HACK columns are end-exclusive, cannot actually target the last character
-- in the line otherwise without switching to visual mode?!
vim.cmd.normal { "v", bang = true }
col = col - 1 -- SIC indices in visual off-by-one compared to normal
offset = offset - 1
col = str_func.offset(line, offset) - 1 -- SIC indices in visual off-by-one compared to normal
end
end

Expand Down