Skip to content

Commit

Permalink
the comment
Browse files Browse the repository at this point in the history
Improve comment handling by making sure that comment tokens are skipped
consistently, postfixed at the "outer" level as much as possible and
generally don't go missing if they get attached to the wrong node.

* keep better track of end-of-token information for better empty line
preservation
* fix several cases of swallowed mid comments
* fix several cases of unnecessarily re-indented comments
* more regular line break after proc-with-body and friends
  • Loading branch information
arnetheduck committed Dec 14, 2023
1 parent 703133b commit ab8f57b
Show file tree
Hide file tree
Showing 24 changed files with 1,009 additions and 684 deletions.
5 changes: 4 additions & 1 deletion nph.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ task self, "Format nph itself":
echo file
exec "./nph " & file

import std/algorithm
task f, "Format":
build()

cd "tests/before"
for file in listFiles("."):
# Sort tests so that 00_empty always is first, which makes it a convenient
# experimentation ground :)
for file in sorted(listFiles(".")):
if file.len > 4 and file[^4..^1] == ".nim":
echo file
exec "../../nph " & file & " --outDir:../after --debug"
1 change: 0 additions & 1 deletion src/astcmp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ proc equivalent*(a, b: PNode): Outcome =
# TODO the positive and negative ranges of integers are not the same - is this a problem?
# what about more complex expressions?
return Outcome(kind: Same)

# runnableExamples: static: ... turns into a staticStmt when broken up in
# lines (!)
if a.kind == nkCall and b.kind == nkStaticStmt and a.sons.len > 1 and
Expand Down
21 changes: 12 additions & 9 deletions src/phast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1040,8 +1040,7 @@ type
ident*: PIdent
else:
sons*: TNodeSeq
when defined(nimsuggest):
endInfo*: TLineInfo
endInfo*: TLineInfo
prefix*: seq[Token] # comments leading up to this node
mid*: seq[Token] # comments in the middle of the node
postfix*: seq[Token] # comments after the node
Expand Down Expand Up @@ -1156,6 +1155,7 @@ type
# for modules, it's a placeholder for compiler
# generated code that will be appended to the
# module after the sem pass (see appendToModule)

options*: TOptions
position*: int
# used for many different things:
Expand All @@ -1168,14 +1168,17 @@ type
# for modules, an unique index corresponding
# to the module's fileIdx
# for variables a slot index for the evaluator

offset*: int32 # offset of record field
disamb*: int32
# disambiguation number; the basic idea is that
# `<procname>__<module>_<disamb>`

loc*: TLoc
annex*: PLib
# additional fields (seldom used, so we use a
# reference to another object to save space)

when hasFFI:
cname*: string
# resolved C declaration name in importc decl, e.g.:
Expand All @@ -1186,6 +1189,7 @@ type
# it won't cause problems
# for skModule the string literal to output for
# deprecated modules.

when defined(nimsuggest):
allUsages*: seq[TLineInfo]

Expand Down Expand Up @@ -1219,19 +1223,23 @@ type
# formal param list
# for concepts, the concept body
# else: unused

owner*: PSym # the 'owner' of the type
sym*: PSym
# types have the sym associated with them
# it is used for converting types to strings

size*: BiggestInt
# the size of the type in bytes
# -1 means that the size is unkwown

align*: int16 # the type's alignment requirements
paddingAtEnd*: int16 #
loc*: TLoc
typeInst*: PType
# for generic instantiations the tyGenericInst that led to this
# type.

uniqueId*: ItemId
# due to a design mistake, we need to keep the real ID here as it
# is required by the --incremental:on mode.
Expand Down Expand Up @@ -1560,6 +1568,7 @@ proc getDeclPragma*(n: PNode): PNode =
Empty
Ident "int"
]#

if n[0].kind == nkPragmaExpr:
result = n[0][1]
else:
Expand Down Expand Up @@ -1945,7 +1954,6 @@ proc assignType*(dest, src: PType) =
dest.n = src.n
dest.size = src.size
dest.align = src.align

# this fixes 'type TLock = TSysLock':
if src.sym != nil:
if dest.sym != nil:
Expand Down Expand Up @@ -1973,7 +1981,6 @@ proc exactReplica*(t: PType): PType =

proc copySym*(s: PSym; idgen: IdGenerator): PSym =
result = newSym(s.kind, s.name, idgen, s.owner, s.info, s.options)

#result.ast = nil # BUGFIX; was: s.ast which made problems
result.typ = s.typ
result.flags = s.flags
Expand All @@ -1992,10 +1999,8 @@ proc createModuleAlias*(
s: PSym; idgen: IdGenerator; newIdent: PIdent; info: TLineInfo; options: TOptions
): PSym =
result = newSym(s.kind, newIdent, idgen, s.owner, info, options)

# keep ID!
result.ast = s.ast

#result.id = s.id # XXX figure out what to do with the ID.
result.flags = s.flags
result.options = s.options
Expand All @@ -2021,7 +2026,6 @@ proc newIdTable*(): TIdTable =

proc resetIdTable*(x: var TIdTable) =
x.counter = 0

# clear and set to old initial size:
setLen(x.data, 0)
setLen(x.data, StartSize)
Expand Down Expand Up @@ -2153,7 +2157,6 @@ template transitionNodeKindCommon(k: TNodeKind) =
mid: obj.mid,
postfix: obj.postfix,
)

# n.comment = obj.comment # shouldn't be needed, the address doesnt' change
when defined(useNodeIds):
n.id = obj.id
Expand Down Expand Up @@ -2487,6 +2490,7 @@ proc toObjectFromRefPtrGeneric*(typ: PType): PType =
A3 = ref object f2: int
A4 = object f3: int
]#

result = typ
while true:
case result.kind
Expand Down Expand Up @@ -2571,7 +2575,6 @@ proc newProcType*(info: TLineInfo; id: ItemId; owner: PSym): PType =
result.n = newNodeI(nkFormalParams, info)

rawAddSon(result, nil) # return type

# result.n[0] used to be `nkType`, but now it's `nkEffectList` because
# the effects are now stored in there too ... this is a bit hacky, but as
# usual we desperately try to save memory:
Expand Down
4 changes: 0 additions & 4 deletions src/phastalgo.nim
Original file line number Diff line number Diff line change
Expand Up @@ -274,13 +274,11 @@ proc sameIgnoreBacktickGensymInfo(a, b: string): bool =

if aa != bb:
return false

# the characters are identical:
if i >= alen:
# both cursors at the end:
if j >= b.len:
return true

# not yet at the end of 'b':
return false
elif j >= b.len:
Expand Down Expand Up @@ -845,7 +843,6 @@ proc debug(n: PNode; conf: ConfigRef) =
var this: DebugPrinter

this.visited = initTable[pointer, int]()

#this.renderSymType = true
this.useColor = not defined(windows)

Expand Down Expand Up @@ -994,7 +991,6 @@ proc strTableInclReportConflict*(
var it = t.data[h]
if it == nil:
break

# Semantic checking can happen multiple times thanks to templates
# and overloading: (var x=@[]; x).mapIt(it).
# So it is possible the very same sym is added multiple
Expand Down
28 changes: 14 additions & 14 deletions src/phlexer.nim
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ const
'@', '~', ':'
}
UnaryMinusWhitelist = {' ', '\t', '\n', '\r', ',', ';', '(', '[', '{'}

# don't forget to update the 'highlite' module if these charsets should change

type
TokType* = enum
tkInvalid = "tkInvalid"
Expand Down Expand Up @@ -192,16 +192,20 @@ type
base*: NumericalBase
# the numerical base; only valid for int
# or float literals

spacing*: set[TokenSpacing] # spaces around token
indent*: int
# the indentation; != -1 if the token has been
# preceded with indentation

ident*: PIdent # the parsed identifier
iNumber*: BiggestInt # the parsed integer literal
fNumber*: BiggestFloat # the parsed floating point literal
literal*: string
# the parsed (string) literal; and
# documentation comments are here too

prevLine*: uint16 # line at which the previous token ended
line*, col*: int
offsetA*, offsetB*: int
# used for pretty printing so that literals
Expand All @@ -214,9 +218,12 @@ type
# if > 0 an indentation has already been read
# this is needed because scanning comments
# needs so much look-ahead

currLineIndent*: int
errorHandler*: ErrorHandler
cache*: IdentCache
tokenEnd*: TLineInfo
previousTokenEnd*: TLineInfo
config*: ConfigRef
printTokens: bool

Expand All @@ -232,6 +239,7 @@ template ones(n): untyped =
((1 shl n) - 1)

# for utf-8 conversion

proc isNimIdentifier*(s: string): bool =
let sLen = s.len
if sLen > 0 and s[0] in SymStartChars:
Expand Down Expand Up @@ -413,7 +421,6 @@ proc getNumber(L: var Lexer; result: var Token) =
L.bufpos = startpos # Use L.bufpos as pos because of matchChars

matchChars(L, t, literalishChars)

# We must verify +/- specifically so that we're not past the literal
if L.buf[L.bufpos] in {'+', '-'} and L.buf[L.bufpos - 1] in {'e', 'E'}:
t.literal.add(L.buf[L.bufpos])
Expand Down Expand Up @@ -522,7 +529,6 @@ proc getNumber(L: var Lexer; result: var Token) =
discard matchUnderscoreChars(L, result, {'0' .. '9'})

let endpos = L.bufpos

# Second stage, find out if there's a datatype suffix and handle it
var postPos = endpos
if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
Expand Down Expand Up @@ -581,7 +587,6 @@ proc getNumber(L: var Lexer; result: var Token) =
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
else:
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)

# Is there still a literalish char awaiting? Then it's an error!
if L.buf[postPos] in literalishChars or
(L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0' .. '9'}):
Expand Down Expand Up @@ -724,7 +729,6 @@ proc getNumber(L: var Lexer; result: var Token) =

if outOfRange:
lexMessageLitNum(L, "number out of range: '$1'", startpos)

# Promote int literal to int64? Not always necessary, but more consistent
if result.tokType == tkIntLit:
if result.iNumber > high(int32) or result.iNumber < low(int32):
Expand Down Expand Up @@ -762,13 +766,11 @@ proc handleHexChar(L: var Lexer; xi: var int; position: range[0 .. 4]) =
of '"', '\'':
if position <= 1:
invalid()

# do not progress the bufpos here.
if position == 0:
inc(L.bufpos)
else:
invalid()

# Need to progress for `nim check`
inc(L.bufpos)

Expand All @@ -780,7 +782,6 @@ proc handleDecChars(L: var Lexer; xi: var int) =

proc addUnicodeCodePoint(s: var string; i: int) =
let i = cast[uint](i)

# inlined toUTF-8 to avoid unicode and strutils dependencies.
let pos = s.len
if i <= 127:
Expand Down Expand Up @@ -945,7 +946,6 @@ proc getString(L: var Lexer; tok: var Token; mode: StringMode) =
tok.tokType = tkTripleStrLit # long string literal:

inc(pos, 2) # skip ""

# skip leading newline:
if L.buf[pos] in {' ', '\t'}:
var newpos = pos + 1
Expand Down Expand Up @@ -1063,7 +1063,6 @@ proc getCharacter(L: var Lexer; tok: var Token) =
tokenEndIgnore(tok, L.bufpos - 1)

const UnicodeOperatorStartChars = {'\226', '\194', '\195'}

# the allowed unicode characters ("∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔")
# all start with one of these.

Expand Down Expand Up @@ -1221,7 +1220,6 @@ proc getOperator(L: var Lexer; tok: var Token) =

endOperator(L, tok, pos, h)
tokenEnd(tok, pos - 1)

# advance pos but don't store it in L.bufpos so the next token (which might
# be an operator too) gets the preceding spaces:
tok.spacing = tok.spacing - {tsTrailing, tsEof}
Expand All @@ -1246,7 +1244,6 @@ proc getPrecedence*(tok: Token): int =
case tok.tokType
of tkOpr:
let relevantChar = tok.ident.s[0]

# arrow like?
if tok.ident.s.len > 1 and tok.ident.s[^1] == '>' and
tok.ident.s[^2] in {'-', '~', '='}:
Expand Down Expand Up @@ -1426,10 +1423,13 @@ proc skip(L: var Lexer; tok: var Token) =

proc rawGetTok*(L: var Lexer; tok: var Token) =
template atTokenEnd() {.dirty.} =
discard
L.previousTokenEnd.line = L.tokenEnd.line
L.previousTokenEnd.col = L.tokenEnd.col
L.tokenEnd.line = tok.line.uint16
L.tokenEnd.col = getColNumber(L, L.bufpos).int16

# TODO nimsuggest leftover
reset(tok)
tok.prevLine = L.tokenEnd.line

tok.indent = -1

Expand Down
7 changes: 5 additions & 2 deletions src/phlineinfos.nim
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import "$nim"/compiler/[ropes, pathutils], std/[hashes, tables]

const explanationsBaseUrl* = "https://nim-lang.github.io/Nim"

# was: "https://nim-lang.org/docs" but we're now usually showing devel docs
# instead of latest release docs.

Expand Down Expand Up @@ -333,18 +332,22 @@ type
quotedName*: Rope
# cached quoted short name for codegen
# purposes

quotedFullName*: Rope
# cached quoted full name for codegen
# purposes

lines*: seq[string]
# the source code of the module
# used for better error messages and
# embedding the original source in the
# generated code

dirtyFile*: AbsoluteFile
# the file that is actually read into memory
# and parsed; usually "" but is used
# for 'nimsuggest'

hash*: string # the checksum of the file
dirty*: bool # for 'nimpretty' like tooling
fullContent*: string
Expand All @@ -368,7 +371,6 @@ type

TErrorOutputs* = set[TErrorOutput]
ERecoverableError* = object of ValueError

ESuggestDone* = object of ValueError

proc `==`*(a, b: FileIndex): bool {.borrow.}
Expand Down Expand Up @@ -399,6 +401,7 @@ type MsgConfig* = object ## does not need to be stored in the incremental cache
trackPosAttached*: bool
## whether the tracking position was attached to
## some close token.

errorOutputs*: TErrorOutputs
msgContext*: seq[tuple[info: TLineInfo, detail: string]]
lastError*: TLineInfo
Expand Down
Loading

0 comments on commit ab8f57b

Please sign in to comment.