the comment

Improve comment handling by making sure that comment tokens are skipped consistently, postfixed at the "outer" level as much as possible and generally don't go missing if they get attached to the wrong node. * keep better track of end-of-token information for better empty line preservation * fix several cases of swallowed mid comments * fix several cases of unnecessarily re-indented comments * more regular line break after proc-with-body and friends
arnetheduck · Dec 14, 2023 · ab8f57b · ab8f57b
1 parent 703133b
commit ab8f57b
Show file tree

Hide file tree

Showing 24 changed files with 1,009 additions and 684 deletions.
diff --git a/nph.nimble b/nph.nimble
@@ -29,11 +29,14 @@ task self, "Format nph itself":
       echo file
       exec "./nph " & file
 
+import std/algorithm
 task f, "Format":
   build()
 
   cd "tests/before"
-  for file in listFiles("."):
+  # Sort tests so that 00_empty always is first, which makes it a convenient
+  # experimentation ground :)
+  for file in sorted(listFiles(".")):
     if file.len > 4 and file[^4..^1] == ".nim":
       echo file
       exec "../../nph " & file & " --outDir:../after --debug"
diff --git a/src/astcmp.nim b/src/astcmp.nim
@@ -47,7 +47,6 @@ proc equivalent*(a, b: PNode): Outcome =
       # TODO the positive and negative ranges of integers are not the same - is this a problem?
       #      what about more complex expressions?
       return Outcome(kind: Same)
-
     # runnableExamples: static: ... turns into a staticStmt when broken up in
     # lines (!)
     if a.kind == nkCall and b.kind == nkStaticStmt and a.sons.len > 1 and

diff --git a/src/phast.nim b/src/phast.nim
@@ -1040,8 +1040,7 @@ type
       ident*: PIdent
     else:
       sons*: TNodeSeq
-    when defined(nimsuggest):
-      endInfo*: TLineInfo
+    endInfo*: TLineInfo
     prefix*: seq[Token] # comments leading up to this node
     mid*: seq[Token] # comments in the middle of the node
     postfix*: seq[Token] # comments after the node
@@ -1156,6 +1155,7 @@ type
       # for modules, it's a placeholder for compiler
       # generated code that will be appended to the
       # module after the sem pass (see appendToModule)
+
     options*: TOptions
     position*: int
       # used for many different things:
@@ -1168,14 +1168,17 @@ type
       # for modules, an unique index corresponding
       # to the module's fileIdx
       # for variables a slot index for the evaluator
+
     offset*: int32 # offset of record field
     disamb*: int32
       # disambiguation number; the basic idea is that
       # `<procname>__<module>_<disamb>`
+
     loc*: TLoc
     annex*: PLib
       # additional fields (seldom used, so we use a
       # reference to another object to save space)
+
     when hasFFI:
       cname*: string
         # resolved C declaration name in importc decl, e.g.:
@@ -1186,6 +1189,7 @@ type
       # it won't cause problems
       # for skModule the string literal to output for
       # deprecated modules.
+
     when defined(nimsuggest):
       allUsages*: seq[TLineInfo]
 
@@ -1219,19 +1223,23 @@ type
       # formal param list
       # for concepts, the concept body
       # else: unused
+
     owner*: PSym # the 'owner' of the type
     sym*: PSym
       # types have the sym associated with them
       # it is used for converting types to strings
+
     size*: BiggestInt
       # the size of the type in bytes
       # -1 means that the size is unkwown
+
     align*: int16 # the type's alignment requirements
     paddingAtEnd*: int16 #
     loc*: TLoc
     typeInst*: PType
       # for generic instantiations the tyGenericInst that led to this
       # type.
+
     uniqueId*: ItemId
       # due to a design mistake, we need to keep the real ID here as it
       # is required by the --incremental:on mode.
@@ -1560,6 +1568,7 @@ proc getDeclPragma*(n: PNode): PNode =
         Empty
         Ident "int"
     ]#
+
     if n[0].kind == nkPragmaExpr:
       result = n[0][1]
   else:
@@ -1945,7 +1954,6 @@ proc assignType*(dest, src: PType) =
   dest.n = src.n
   dest.size = src.size
   dest.align = src.align
-
   # this fixes 'type TLock = TSysLock':
   if src.sym != nil:
     if dest.sym != nil:
@@ -1973,7 +1981,6 @@ proc exactReplica*(t: PType): PType =
 
 proc copySym*(s: PSym; idgen: IdGenerator): PSym =
   result = newSym(s.kind, s.name, idgen, s.owner, s.info, s.options)
-
   #result.ast = nil            # BUGFIX; was: s.ast which made problems
   result.typ = s.typ
   result.flags = s.flags
@@ -1992,10 +1999,8 @@ proc createModuleAlias*(
     s: PSym; idgen: IdGenerator; newIdent: PIdent; info: TLineInfo; options: TOptions
 ): PSym =
   result = newSym(s.kind, newIdent, idgen, s.owner, info, options)
-
   # keep ID!
   result.ast = s.ast
-
   #result.id = s.id # XXX figure out what to do with the ID.
   result.flags = s.flags
   result.options = s.options
@@ -2021,7 +2026,6 @@ proc newIdTable*(): TIdTable =
 
 proc resetIdTable*(x: var TIdTable) =
   x.counter = 0
-
   # clear and set to old initial size:
   setLen(x.data, 0)
   setLen(x.data, StartSize)
@@ -2153,7 +2157,6 @@ template transitionNodeKindCommon(k: TNodeKind) =
       mid: obj.mid,
       postfix: obj.postfix,
     )
-
   # n.comment = obj.comment # shouldn't be needed, the address doesnt' change
   when defined(useNodeIds):
     n.id = obj.id
@@ -2487,6 +2490,7 @@ proc toObjectFromRefPtrGeneric*(typ: PType): PType =
     A3 = ref object f2: int
     A4 = object f3: int
   ]#
+
   result = typ
   while true:
     case result.kind
@@ -2571,7 +2575,6 @@ proc newProcType*(info: TLineInfo; id: ItemId; owner: PSym): PType =
   result.n = newNodeI(nkFormalParams, info)
 
   rawAddSon(result, nil) # return type
-
   # result.n[0] used to be `nkType`, but now it's `nkEffectList` because
   # the effects are now stored in there too ... this is a bit hacky, but as
   # usual we desperately try to save memory:

diff --git a/src/phastalgo.nim b/src/phastalgo.nim
@@ -274,13 +274,11 @@ proc sameIgnoreBacktickGensymInfo(a, b: string): bool =
 
     if aa != bb:
       return false
-
     # the characters are identical:
     if i >= alen:
       # both cursors at the end:
       if j >= b.len:
         return true
-
       # not yet at the end of 'b':
       return false
     elif j >= b.len:
@@ -845,7 +843,6 @@ proc debug(n: PNode; conf: ConfigRef) =
   var this: DebugPrinter
 
   this.visited = initTable[pointer, int]()
-
   #this.renderSymType = true
   this.useColor = not defined(windows)
 
@@ -994,7 +991,6 @@ proc strTableInclReportConflict*(
     var it = t.data[h]
     if it == nil:
       break
-
     # Semantic checking can happen multiple times thanks to templates
     # and overloading: (var x=@[]; x).mapIt(it).
     # So it is possible the very same sym is added multiple

diff --git a/src/phlexer.nim b/src/phlexer.nim
@@ -43,8 +43,8 @@ const
       '@', '~', ':'
     }
   UnaryMinusWhitelist = {' ', '\t', '\n', '\r', ',', ';', '(', '[', '{'}
-
 # don't forget to update the 'highlite' module if these charsets should change
+
 type
   TokType* = enum
     tkInvalid = "tkInvalid"
@@ -192,16 +192,20 @@ type
     base*: NumericalBase
       # the numerical base; only valid for int
       # or float literals
+
     spacing*: set[TokenSpacing] # spaces around token
     indent*: int
       # the indentation; != -1 if the token has been
       # preceded with indentation
+
     ident*: PIdent # the parsed identifier
     iNumber*: BiggestInt # the parsed integer literal
     fNumber*: BiggestFloat # the parsed floating point literal
     literal*: string
       # the parsed (string) literal; and
       # documentation comments are here too
+
+    prevLine*: uint16 # line at which the previous token ended
     line*, col*: int
     offsetA*, offsetB*: int
       # used for pretty printing so that literals
@@ -214,9 +218,12 @@ type
       # if > 0 an indentation has already been read
       # this is needed because scanning comments
       # needs so much look-ahead
+
     currLineIndent*: int
     errorHandler*: ErrorHandler
     cache*: IdentCache
+    tokenEnd*: TLineInfo
+    previousTokenEnd*: TLineInfo
     config*: ConfigRef
     printTokens: bool
 
@@ -232,6 +239,7 @@ template ones(n): untyped =
   ((1 shl n) - 1)
 
 # for utf-8 conversion
+
 proc isNimIdentifier*(s: string): bool =
   let sLen = s.len
   if sLen > 0 and s[0] in SymStartChars:
@@ -413,7 +421,6 @@ proc getNumber(L: var Lexer; result: var Token) =
     L.bufpos = startpos # Use L.bufpos as pos because of matchChars
 
     matchChars(L, t, literalishChars)
-
     # We must verify +/- specifically so that we're not past the literal
     if L.buf[L.bufpos] in {'+', '-'} and L.buf[L.bufpos - 1] in {'e', 'E'}:
       t.literal.add(L.buf[L.bufpos])
@@ -522,7 +529,6 @@ proc getNumber(L: var Lexer; result: var Token) =
       discard matchUnderscoreChars(L, result, {'0' .. '9'})
 
   let endpos = L.bufpos
-
   # Second stage, find out if there's a datatype suffix and handle it
   var postPos = endpos
   if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
@@ -581,7 +587,6 @@ proc getNumber(L: var Lexer; result: var Token) =
         lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
     else:
       lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
-
   # Is there still a literalish char awaiting? Then it's an error!
   if L.buf[postPos] in literalishChars or
       (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0' .. '9'}):
@@ -724,7 +729,6 @@ proc getNumber(L: var Lexer; result: var Token) =
 
         if outOfRange:
           lexMessageLitNum(L, "number out of range: '$1'", startpos)
-
       # Promote int literal to int64? Not always necessary, but more consistent
       if result.tokType == tkIntLit:
         if result.iNumber > high(int32) or result.iNumber < low(int32):
@@ -762,13 +766,11 @@ proc handleHexChar(L: var Lexer; xi: var int; position: range[0 .. 4]) =
   of '"', '\'':
     if position <= 1:
       invalid()
-
     # do not progress the bufpos here.
     if position == 0:
       inc(L.bufpos)
   else:
     invalid()
-
     # Need to progress for `nim check`
     inc(L.bufpos)
 
@@ -780,7 +782,6 @@ proc handleDecChars(L: var Lexer; xi: var int) =
 
 proc addUnicodeCodePoint(s: var string; i: int) =
   let i = cast[uint](i)
-
   # inlined toUTF-8 to avoid unicode and strutils dependencies.
   let pos = s.len
   if i <= 127:
@@ -945,7 +946,6 @@ proc getString(L: var Lexer; tok: var Token; mode: StringMode) =
     tok.tokType = tkTripleStrLit # long string literal:
 
     inc(pos, 2) # skip ""
-
     # skip leading newline:
     if L.buf[pos] in {' ', '\t'}:
       var newpos = pos + 1
@@ -1063,7 +1063,6 @@ proc getCharacter(L: var Lexer; tok: var Token) =
     tokenEndIgnore(tok, L.bufpos - 1)
 
 const UnicodeOperatorStartChars = {'\226', '\194', '\195'}
-
 # the allowed unicode characters ("∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔")
 # all start with one of these.
 
@@ -1221,7 +1220,6 @@ proc getOperator(L: var Lexer; tok: var Token) =
 
   endOperator(L, tok, pos, h)
   tokenEnd(tok, pos - 1)
-
   # advance pos but don't store it in L.bufpos so the next token (which might
   # be an operator too) gets the preceding spaces:
   tok.spacing = tok.spacing - {tsTrailing, tsEof}
@@ -1246,7 +1244,6 @@ proc getPrecedence*(tok: Token): int =
   case tok.tokType
   of tkOpr:
     let relevantChar = tok.ident.s[0]
-
     # arrow like?
     if tok.ident.s.len > 1 and tok.ident.s[^1] == '>' and
         tok.ident.s[^2] in {'-', '~', '='}:
@@ -1426,10 +1423,13 @@ proc skip(L: var Lexer; tok: var Token) =
 
 proc rawGetTok*(L: var Lexer; tok: var Token) =
   template atTokenEnd() {.dirty.} =
-    discard
+    L.previousTokenEnd.line = L.tokenEnd.line
+    L.previousTokenEnd.col = L.tokenEnd.col
+    L.tokenEnd.line = tok.line.uint16
+    L.tokenEnd.col = getColNumber(L, L.bufpos).int16
 
-  # TODO nimsuggest leftover
   reset(tok)
+  tok.prevLine = L.tokenEnd.line
 
   tok.indent = -1
 

diff --git a/src/phlineinfos.nim b/src/phlineinfos.nim
@@ -14,7 +14,6 @@
 import "$nim"/compiler/[ropes, pathutils], std/[hashes, tables]
 
 const explanationsBaseUrl* = "https://nim-lang.github.io/Nim"
-
 # was: "https://nim-lang.org/docs" but we're now usually showing devel docs
 # instead of latest release docs.
 
@@ -333,18 +332,22 @@ type
     quotedName*: Rope
       # cached quoted short name for codegen
       # purposes
+
     quotedFullName*: Rope
       # cached quoted full name for codegen
       # purposes
+
     lines*: seq[string]
       # the source code of the module
       #   used for better error messages and
       #   embedding the original source in the
       #   generated code
+
     dirtyFile*: AbsoluteFile
       # the file that is actually read into memory
       # and parsed; usually "" but is used
       # for 'nimsuggest'
+
     hash*: string # the checksum of the file
     dirty*: bool # for 'nimpretty' like tooling
     fullContent*: string
@@ -368,7 +371,6 @@ type
 
   TErrorOutputs* = set[TErrorOutput]
   ERecoverableError* = object of ValueError
-
   ESuggestDone* = object of ValueError
 
 proc `==`*(a, b: FileIndex): bool {.borrow.}
@@ -399,6 +401,7 @@ type MsgConfig* = object ## does not need to be stored in the incremental cache
   trackPosAttached*: bool
     ## whether the tracking position was attached to
     ## some close token.
+
   errorOutputs*: TErrorOutputs
   msgContext*: seq[tuple[info: TLineInfo, detail: string]]
   lastError*: TLineInfo