From 0b65deb88e250de1a4bf9e4f6053bd6d10bb9aeb Mon Sep 17 00:00:00 2001 From: Pavel Date: Thu, 13 Jul 2023 08:07:31 +0300 Subject: [PATCH 1/3] feat!: add utf8 support For utf8 support was used library from luarocks.org: luautf8. It contains several string methods and utf8 specific methods implemented by the lua specification. Thanks to this, the code has not more complexity and was simplified from `col` indexing to `offset` indexing. With it methods logic was simplified. The reason why I refactored some code was byte width in utf8. In first particular range utf8 contains one byte, in second particular range utf8 contains two bytes and so it goes on. In this case indexing by `col` more complex than indexing by `offset`. Also I changed in README.md in installation section, adding luarock dependency. packer.nvim have native rocks installation, but for lazy.nvim need add `nvim_rocks` dependency that installs luarock dependency. --- README.md | 17 ++++++++- lua/spider/init.lua | 93 +++++++++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index bf42c14..05852c8 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,22 @@ call. }, -- packer -use { "chrisgrieser/nvim-spider" } +use { "chrisgrieser/nvim-spider", rocks = "luautf8" } + +-- lazy.nvim +{ + "chrisgrieser/nvim-spider", + lazy = true, + dependencies = { + "theHamsta/nvim_rocks", + event = "VeryLazy", + build = "pip3 install --user hererocks && python3 -mhererocks . -j2.1.0-beta3 -r3.0.0 && cp nvim_rocks.lua lua", + config = function() + local rocks = require("nvim_rocks") + rocks.ensure_installed("luautf8") + end, + } +}, ``` No keybindings are created by default. Below are the mappings to replace the diff --git a/lua/spider/init.lua b/lua/spider/init.lua index cd16143..b3b5d66 100644 --- a/lua/spider/init.lua +++ b/lua/spider/init.lua @@ -1,3 +1,5 @@ +local utf8 = require("lua-utf8") + local M = {} local patternVariants = require("spider.pattern-variants") @@ -35,18 +37,20 @@ end ---@param line string ---@param pattern string ---@param endOfWord boolean look for the end of the pattern instead of the start ----@param col number look for the first match after this number +---@param offset number -- look for the first match after this number ---@nodiscard ----@return number|nil -- returns nil if none is found -local function firstMatchAfter(line, pattern, endOfWord, col) +---@return number|nil returns nil if none is found +local function firstMatchAfter(line, pattern, endOfWord, offset) -- special case: pattern with ^/$, since there can only be one match -- and since gmatch won't work with them if pattern:find("^%^") or pattern:find("%$$") then - if pattern:find("%$$") and col >= #line then return nil end -- checking for high col count for virtualedit - if pattern:find("^%^") and col ~= 1 then return nil end - local start, endPos = line:find(pattern) + if pattern:find("%$$") and offset > utf8.len(line) then return nil end -- checking for high col count for virtualedit + if pattern:find("^%^") and offset ~= 0 then return nil end + + local start, endPos = utf8.find(line, pattern) + if start == nil or endPos == nil then return nil end + local pos = endOfWord and endPos or start - if pos and not endOfWord then pos = pos - 1 end return pos end @@ -56,47 +60,44 @@ local function firstMatchAfter(line, pattern, endOfWord, col) pattern = "()" .. pattern end -- `:gmatch` will return all locations in the string where the pattern is - -- found, the loop looks for the first one that is higher than the col to - -- look from - for pos in line:gmatch(pattern) do - if endOfWord and pos > col then return pos - 1 end - if not endOfWord and pos >= col then return pos - 1 end + -- found, the loop looks for the first one that is higher than the offset + -- to look from + for pos in utf8.gmatch(line, pattern) do + if endOfWord then pos = pos - 1 end + if pos > offset then return pos end end return nil end ---@param line string input string where to find the pattern ----@param col number position to start looking from +---@param offset number position to start looking from ---@param key "w"|"e"|"b"|"ge" the motion to perform ---@param opts optsObj configuration table as in setup() ---@nodiscard ---@return number|nil pattern position, returns nil if no pattern was found -local function getNextPosition(line, col, key, opts) +local function getNextPosition(line, offset, key, opts) local endOfWord = (key == "ge") or (key == "e") local backwards = (key == "b") or (key == "ge") local patterns = patternVariants.get(opts, backwards) if backwards then - line = line:reverse() + line = utf8.reverse(line) endOfWord = not endOfWord - if col == -1 then - col = 1 - else - col = #line - col + 1 - end + + local isSameLine = offset ~= 0 + if isSameLine then offset = utf8.len(line) - offset + 1 end end -- search for patterns, get closest one local matches = {} for _, pattern in pairs(patterns) do - local match = firstMatchAfter(line, pattern, endOfWord, col) + local match = firstMatchAfter(line, pattern, endOfWord, offset) if match then table.insert(matches, match) end end if vim.tbl_isempty(matches) then return nil end -- none found in this line local nextPos = math.min(unpack(matches)) - if not endOfWord then nextPos = nextPos + 1 end - if backwards then nextPos = #line - nextPos + 1 end + if backwards then nextPos = utf8.len(line) - nextPos + 1 end return nextPos end @@ -118,44 +119,54 @@ function M.motion(key, motionOpts) end local row, col = unpack(vim.api.nvim_win_get_cursor(0)) - local startCol, startRow = col, row - local lastRow = vim.api.nvim_buf_line_count(0) + local startRow = row + local lastRow = vim.fn.line("$") local forwards = key == "w" or key == "e" - -- loop through counts - for i = 1, vim.v.count1, 1 do - if forwards then - col = col + 2 -- +1 (next position), +1 lua indexing - elseif not forwards and i > 1 then - col = col - 1 -- next pos - end + local line = getline(row) + local offset = 1 + for p, _ in utf8.codes(getline(row)) do + if p > col then break end + offset = offset + 1 + end + local startOffset = offset - -- loop through rows (if next location not found in line) + -- looping through counts + for _ = 1, vim.v.count1, 1 do + -- looping through rows (if next location not found in line) while true do - local line = getline(row) - col = getNextPosition(line, col, key, opts) - local onTheSamePos = (col == startCol + 1 and row == startRow) - if col and not onTheSamePos then break end - col = forwards and 1 or -1 + local result = getNextPosition(line, offset, key, opts) + if result then + offset = result + local onTheSamePos = (offset == startOffset and row == startRow) + if not onTheSamePos then break end + end + + offset = 0 row = forwards and row + 1 or row - 1 if row > lastRow or row < 1 then return end + line = getline(row) end end - col = col - 1 -- lua string indices different + col = utf8.offset(line, offset) - 1 -- lua string indices different -- operator-pending specific considerations (see issues #3 and #5) local mode = vim.api.nvim_get_mode().mode local isOperatorPending = mode == "no" -- = [n]ormal & [o]perator, not the word "no" if isOperatorPending then local lastCol = vim.fn.col("$") - if key == "e" then col = col + 1 end + if key == "e" then + offset = offset + 1 + col = utf8.offset(line, offset) - 1 + end if lastCol - 1 == col then -- HACK columns are end-exclusive, cannot actually target the last character -- in the line otherwise without switching to visual mode?! vim.cmd.normal { "v", bang = true } - col = col - 1 -- SIC indices in visual off-by-one compared to normal + offset = offset - 1 + col = utf8.offset(line, offset) - 1 -- SIC indices in visual off-by-one compared to normal end end From d1472593e872e1c8e4846d7bcddef5f749ff2b7f Mon Sep 17 00:00:00 2001 From: Pavel Date: Mon, 17 Jul 2023 12:10:03 +0300 Subject: [PATCH 2/3] feat: adding authomatic detection of UTF-8 lib There solve the problem: optional `libutf8` lua library. Users who don't need UTF-8 support, because they only work with latin1 symbols, can continuously use this plugin without any problem. And they can add UTF-8 support if needed in an easy way: just add the dependency in nvim plugin manager. There is solution: adding metatable with universal string functions like "reverse", "find", "gmatch" and others. And use `pcall` method for detection of the lua accessibility plugin `luautf8`. Override string functions in metatable if plugin is exists. So there is no dependency confusion. I've also changed the README.md by restoring the "Installation" section and adding the "UTF-8 support" section. --- README.md | 26 +++++++++++++++++- lua/spider/init.lua | 64 +++++++++++++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 05852c8..0ef41ca 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,31 @@ call. }, -- packer -use { "chrisgrieser/nvim-spider", rocks = "luautf8" } +use { "chrisgrieser/nvim-spider" } + +-- lazy.nvim +{ "chrisgrieser/nvim-spider", lazy = true }, +``` + +No keybindings are created by default. Below are the mappings to replace the default `w`, `e`, and `b` motions with this plugin's version of them. + +```lua +vim.keymap.set({"n", "o", "x"}, "w", "lua require('spider').motion('w')", { desc = "Spider-w" }) +vim.keymap.set({"n", "o", "x"}, "e", "lua require('spider').motion('e')", { desc = "Spider-e" }) +vim.keymap.set({"n", "o", "x"}, "b", "lua require('spider').motion('b')", { desc = "Spider-b" }) +vim.keymap.set({"n", "o", "x"}, "ge", "lua require('spider').motion('ge')", { desc = "Spider-ge" }) +``` + +> __Note__ +> For dot-repeat to work, you have to call the motions as Ex-commands. When calling `function() require("spider").motion("w") end` as third argument of the keymap, dot-repeatability will *not* work. + +## UTF-8 support + +For adding UTF-8 support for matching non-ASCII text, add rocks `luautf8` in packer.nvim. Or add [dependency](https://github.com/theHamsta/nvim_rocks) like below, in lazy.nvim example. + +```lua +-- packer +{ "chrisgrieser/nvim-spider", rocks = "luautf8" } -- lazy.nvim { diff --git a/lua/spider/init.lua b/lua/spider/init.lua index b3b5d66..f4a9fcb 100644 --- a/lua/spider/init.lua +++ b/lua/spider/init.lua @@ -1,8 +1,41 @@ -local utf8 = require("lua-utf8") - local M = {} local patternVariants = require("spider.pattern-variants") +local str_func = { + reverse = string.reverse, + find = string.find, + gmatch = string.gmatch, + len = string.len, + init_pos = function(_, col) + col = col + 1 -- from 0-based indexing to 1-based + local startCol = col + return col, startCol + end, + offset = function(_, pos) + return pos + end, +} + +local ok, utf8 = pcall(require, "lua-utf8") + +if ok then + -- remapping functions to utf8 supported functions + for name, _ in pairs(str_func) do + if utf8[name] then + str_func[name] = utf8[name] + end + end + str_func.init_pos = function(s, col) + local offset = 1 + for p, _ in utf8.codes(s) do + if p > col then break end + offset = offset + 1 + end + local startOffset = offset + return offset, startOffset + end +end + -------------------------------------------------------------------------------- -- CONFIG ---@class (exact) optsObj @@ -44,10 +77,10 @@ local function firstMatchAfter(line, pattern, endOfWord, offset) -- special case: pattern with ^/$, since there can only be one match -- and since gmatch won't work with them if pattern:find("^%^") or pattern:find("%$$") then - if pattern:find("%$$") and offset > utf8.len(line) then return nil end -- checking for high col count for virtualedit + if pattern:find("%$$") and offset > str_func.len(line) then return nil end -- checking for high col count for virtualedit if pattern:find("^%^") and offset ~= 0 then return nil end - local start, endPos = utf8.find(line, pattern) + local start, endPos = str_func.find(line, pattern) if start == nil or endPos == nil then return nil end local pos = endOfWord and endPos or start @@ -62,7 +95,9 @@ local function firstMatchAfter(line, pattern, endOfWord, offset) -- `:gmatch` will return all locations in the string where the pattern is -- found, the loop looks for the first one that is higher than the offset -- to look from - for pos in utf8.gmatch(line, pattern) do + for pos in str_func.gmatch(line, pattern) do + if type(pos) == "string" then return nil end + if endOfWord then pos = pos - 1 end if pos > offset then return pos end end @@ -81,11 +116,11 @@ local function getNextPosition(line, offset, key, opts) local patterns = patternVariants.get(opts, backwards) if backwards then - line = utf8.reverse(line) + line = str_func.reverse(line) endOfWord = not endOfWord local isSameLine = offset ~= 0 - if isSameLine then offset = utf8.len(line) - offset + 1 end + if isSameLine then offset = str_func.len(line) - offset + 1 end end -- search for patterns, get closest one @@ -97,7 +132,7 @@ local function getNextPosition(line, offset, key, opts) if vim.tbl_isempty(matches) then return nil end -- none found in this line local nextPos = math.min(unpack(matches)) - if backwards then nextPos = utf8.len(line) - nextPos + 1 end + if backwards then nextPos = str_func.len(line) - nextPos + 1 end return nextPos end @@ -124,12 +159,7 @@ function M.motion(key, motionOpts) local forwards = key == "w" or key == "e" local line = getline(row) - local offset = 1 - for p, _ in utf8.codes(getline(row)) do - if p > col then break end - offset = offset + 1 - end - local startOffset = offset + local offset, startOffset = str_func.init_pos(line, col) -- looping through counts for _ = 1, vim.v.count1, 1 do @@ -149,7 +179,7 @@ function M.motion(key, motionOpts) end end - col = utf8.offset(line, offset) - 1 -- lua string indices different + col = str_func.offset(line, offset) - 1 -- lua string indices different -- operator-pending specific considerations (see issues #3 and #5) local mode = vim.api.nvim_get_mode().mode @@ -158,7 +188,7 @@ function M.motion(key, motionOpts) local lastCol = vim.fn.col("$") if key == "e" then offset = offset + 1 - col = utf8.offset(line, offset) - 1 + col = str_func.offset(line, offset) - 1 end if lastCol - 1 == col then @@ -166,7 +196,7 @@ function M.motion(key, motionOpts) -- in the line otherwise without switching to visual mode?! vim.cmd.normal { "v", bang = true } offset = offset - 1 - col = utf8.offset(line, offset) - 1 -- SIC indices in visual off-by-one compared to normal + col = str_func.offset(line, offset) - 1 -- SIC indices in visual off-by-one compared to normal end end From dd9f7757b7f54ba7cff3e4e6bf063fb080aa35ef Mon Sep 17 00:00:00 2001 From: Pavel Date: Thu, 21 Dec 2023 10:18:59 +0300 Subject: [PATCH 3/3] chore(README.md): removed duplication of text --- README.md | 60 ++++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 0ef41ca..16bb64c 100644 --- a/README.md +++ b/README.md @@ -101,42 +101,6 @@ use { "chrisgrieser/nvim-spider" } { "chrisgrieser/nvim-spider", lazy = true }, ``` -No keybindings are created by default. Below are the mappings to replace the default `w`, `e`, and `b` motions with this plugin's version of them. - -```lua -vim.keymap.set({"n", "o", "x"}, "w", "lua require('spider').motion('w')", { desc = "Spider-w" }) -vim.keymap.set({"n", "o", "x"}, "e", "lua require('spider').motion('e')", { desc = "Spider-e" }) -vim.keymap.set({"n", "o", "x"}, "b", "lua require('spider').motion('b')", { desc = "Spider-b" }) -vim.keymap.set({"n", "o", "x"}, "ge", "lua require('spider').motion('ge')", { desc = "Spider-ge" }) -``` - -> __Note__ -> For dot-repeat to work, you have to call the motions as Ex-commands. When calling `function() require("spider").motion("w") end` as third argument of the keymap, dot-repeatability will *not* work. - -## UTF-8 support - -For adding UTF-8 support for matching non-ASCII text, add rocks `luautf8` in packer.nvim. Or add [dependency](https://github.com/theHamsta/nvim_rocks) like below, in lazy.nvim example. - -```lua --- packer -{ "chrisgrieser/nvim-spider", rocks = "luautf8" } - --- lazy.nvim -{ - "chrisgrieser/nvim-spider", - lazy = true, - dependencies = { - "theHamsta/nvim_rocks", - event = "VeryLazy", - build = "pip3 install --user hererocks && python3 -mhererocks . -j2.1.0-beta3 -r3.0.0 && cp nvim_rocks.lua lua", - config = function() - local rocks = require("nvim_rocks") - rocks.ensure_installed("luautf8") - end, - } -}, -``` - No keybindings are created by default. Below are the mappings to replace the default `w`, `e`, and `b` motions with this plugin's version of them. @@ -166,6 +130,30 @@ vim.keymap.set( > using `function() require("spider").motion("w") end` as third argument of > the keymap, dot-repeatability will not work. +## UTF-8 support + +For adding UTF-8 support for matching non-ASCII text, add rocks `luautf8` in packer.nvim. Or add [dependency](https://github.com/theHamsta/nvim_rocks) like below, in lazy.nvim example. + +```lua +-- packer +{ "chrisgrieser/nvim-spider", rocks = "luautf8" } + +-- lazy.nvim +{ + "chrisgrieser/nvim-spider", + lazy = true, + dependencies = { + "theHamsta/nvim_rocks", + event = "VeryLazy", + build = "pip3 install --user hererocks && python3 -mhererocks . -j2.1.0-beta3 -r3.0.0 && cp nvim_rocks.lua lua", + config = function() + local rocks = require("nvim_rocks") + rocks.ensure_installed("luautf8") + end, + } +}, +``` + ## Configuration The `.setup()` call is optional.