Skip to content

Commit

Permalink
Completed the mini Clojure reader as the core of the indentation system
Browse files Browse the repository at this point in the history
Some refactoring should be possible here and further optimisations.
Once all optimisations I can think of have been implemented, I'll try
writing an alternate Vim9 script version.

(The syntax highlight group checks used in previous implementations of
the indentation code was the core bottleneck, so a Vim9 script version
would not have been much faster.)
  • Loading branch information
axvr committed May 1, 2023
1 parent cc9cda7 commit ee2acc2
Showing 1 changed file with 76 additions and 52 deletions.
128 changes: 76 additions & 52 deletions indent/clojure.vim
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,36 @@ setlocal noautoindent nosmartindent nolisp
setlocal softtabstop=2 shiftwidth=2 expandtab
setlocal indentkeys=!,o,O

" Returns true if char_idx is preceded by an odd number of backslashes.
function! s:IsEscaped(line_str, char_idx)
let ln = a:line_str[: a:char_idx - 1]
" TODO: After all optimisations create Vim9script variant of the core algorithm.

" Returns "1" if position "i_char" in "line_str" is preceded by an odd number
" of backslash characters (i.e. escaped).
function! s:IsEscaped(line_str, i_char)
let ln = a:line_str[: a:i_char - 1]
return (strlen(ln) - strlen(trim(ln, '\', 2))) % 2
endfunction

let s:pairs = {'(': ')', '[': ']', '{': '}'}

" TODO: Maybe write a Vim9script version of this?
" Repeatedly search for tokens on the given line in reverse order building up
" a list of tokens and their positions. Ignores escaped tokens.
function! s:AnalyseLine(line_num)
" Repeatedly search for tokens on a given line (in reverse order) building up
" a list of tokens and their positions. Ignores escaped tokens. Does not
" care about strings, as that is handled by "s:InsideForm".
function! s:TokeniseLine(line_num)
let tokens = []
let ln = getline(a:line_num)
while 1
" Due to legacy Vimscript being painfully slow, we literally
" have to move the cursor and perform searches which is
" ironically faster than for looping by character.
" We perform searches within the buffer (and move the cusor)
" for better performance than looping char by char in a line.
let token = searchpos('[()\[\]{};"]', 'bW', a:line_num)

" No more matches, exit loop.
if token == [0, 0] | break | endif

let t_idx = token[1] - 1

" Escaped character, ignore.
if s:IsEscaped(ln, t_idx) | continue | endif
let t_char = ln[t_idx]
let t_char = ln[t_idx]
if t_char ==# ';'
" Comment found, reset the token list for this line.
tokens = []
Expand All @@ -59,61 +63,79 @@ function! s:AnalyseLine(line_num)
return tokens
endfunction

" This should also be capable of figuring out if we're in a multi-line string
" or regex.
function! s:InverseRead(lnum)
let lnum = a:lnum - 1
let s:pairs = {'(': ')', '[': ']', '{': '}'}

" TODO: refactor this procedure and optimise.
" This procedure is essentially a lightweight Clojure reader.
function! s:InsideForm(lnum)
" Reset cursor to first column of the line we wish to indent.
call cursor(a:lnum, 1)

" Token list looks like this: "[[delim, [line, col]], ...]".
let tokens = []
let first_string_pos = []
let in_string = 0

let lnum = a:lnum - 1
while lnum > 0
call cursor(lnum + 1, 1)
let line_tokens = s:AnalyseLine(lnum)
" Reduce tokens from line "lnum" into "tokens".
for tk in s:TokeniseLine(lnum)
" Keep track of the first string delimiter we see, as
" we'll need it later for multi-line strings/regexps.
if first_string_pos == [] && tk[0] ==# '"'
let first_string_pos = tk[1]
endif

" let should_ignore = empty(a:tokens) ? 0 : (a:tokens[-1][0] ==# '"')
" When in string ignore other tokens.
if in_string && tk[0] !=# '"'
continue
else
let in_string = 0
endif

" Reduce "tokens" and "line_tokens".
for t in line_tokens
" TODO: attempt early termination.
" TODO: early termination?
if empty(tokens)
call add(tokens, t)
elseif t[0] ==# '"' && tokens[-1][0] ==# '"'
" TODO: track original start and ignore values
" inside strings.
call add(tokens, tk)
elseif tk[0] ==# '"' && tokens[-1][0] ==# '"'
call remove(tokens, -1)
elseif get(s:pairs, t[0], '') ==# tokens[-1][0]
elseif get(s:pairs, tk[0], '') ==# tokens[-1][0]
" Matching pair: drop the last item in tokens.
call remove(tokens, -1)
else
" No match: append to token list.
call add(tokens, t)
call add(tokens, tk)
endif
endfor

" echom 'Pass' lnum tokens

if ! empty(tokens) && has_key(s:pairs, tokens[0][0])
" TODO: on string match, check if string or regex.
" echom 'Match!' tokens[0]
return tokens[0]
endif

let lnum -= 1
endwhile

if ! empty(tokens) && tokens[0][0] ==# '"'
" Must have been in a multi-line string or regular expression
" as the string was never closed.
return ['"', first_string_pos]
endif

return ['^', [0, 0]] " Default to top-level.
endfunction

" Get the value of a configuration option.
function! s:Conf(opt, default)
return get(b:, a:opt, get(g:, a:opt, a:default))
endfunction

" Returns "1" when the previous operator used was "=" and is currently active.
function! s:EqualsOperatorInEffect()
" Returns 1 when the previous operator used is "=" and is currently in
" effect (i.e. "state" includes "o").
return v:operator ==# '=' && state('o') ==# 'o'
endfunction

function! s:GetStringIndent(delim_pos, is_regex)
function! s:StringIndent(delim_pos)
" Mimic multi-line string indentation behaviour in VS Code and Emacs.
let m = mode()
if m ==# 'i' || (m ==# 'n' && ! s:EqualsOperatorInEffect())
Expand All @@ -124,43 +146,45 @@ function! s:GetStringIndent(delim_pos, is_regex)
" 1: Indent in alignment with string start delimiter.
if alignment == -1 | return 0
elseif alignment == 1 | return a:delim_pos[1]
else | return a:delim_pos[1] - (a:is_regex ? 2 : 1)
else
let col = a:delim_pos[1]
let is_regex = col > 1 && getline(a:delim_pos[0])[col - 2] ==# '#'
return col - (is_regex ? 2 : 1)
endif
else
return -1 " Keep existing indent.
endif
endfunction

function! s:GetListIndent(delim_pos)
" TODO Begin analysis and apply rules!
function! s:ListIndent(delim_pos)
" let lns = getline(delim_pos[0], v:lnum - 1)
let ln1 = getline(delim_pos[0])
let sym = get(split(ln1[delim_pos[1]:], '[[:space:],;()\[\]{}@\\"^~`]', 1), 0, -1)
let ln1 = getline(a:delim_pos[0])
let delim_col = a:delim_pos[1]
let sym = get(split(ln1[delim_col:], '[[:space:],;()\[\]{}@\\"^~`]', 1), 0, -1)
if sym != -1 && ! empty(sym) && match(sym, '^[0-9:]') == -1
" TODO: align indentation.
" TODO: lookup rules.
return delim_pos[1] + 1 " 2 space indentation
return delim_col + 1 " 2 space indentation
endif

" TODO: switch between 1 vs 2 space indentation.
return delim_pos[1] " 1 space indentation
return delim_col " 1 space indentation
endfunction

function! s:GetClojureIndent()
function! s:ClojureIndent()
" Calculate and return indent to use based on the matching form.
let [formtype, coord] = s:InverseRead(v:lnum)
if formtype ==# '^' | return 0 " At top-level, no indent.
elseif formtype ==# '(' | return s:GetListIndent(coord)
elseif formtype ==# '[' | return coord[1] " Vector
elseif formtype ==# '{' | return coord[1] " Map/set
elseif formtype ==# '"' | return s:GetStringIndent(coord, 0)
elseif formtype ==# '#"' | return s:GetStringIndent(coord, 1)
else | return -1 " Keep existing indent.
let [form, pos] = s:InsideForm(v:lnum)
if form ==# '^' | return 0 " At top-level, no indent.
elseif form ==# '(' | return s:ListIndent(pos)
elseif form ==# '[' | return pos[1]
elseif form ==# '{' | return pos[1]
elseif form ==# '"' | return s:StringIndent(pos)
else | return -1 " Keep existing indent.
endif
endfunction

" TODO: lispoptions if exists.
setlocal indentexpr=s:GetClojureIndent()
" TODO: set lispoptions if exists.
setlocal indentexpr=s:ClojureIndent()

let &cpoptions = s:save_cpo
unlet! s:save_cpo
Expand Down

0 comments on commit ee2acc2

Please sign in to comment.