From 9c6d97cc3be72f6e337ef0d066e0813f08280ca6 Mon Sep 17 00:00:00 2001
From: Yuri Romanowski <yuri.romanowski@serokell.io>
Date: Tue, 13 Dec 2022 16:17:00 +0500
Subject: [PATCH 1/3] [#64] Refactor the markdown scanner

Problem: Current implementation of the markdown scanner is hard
to extend, so we need to refactor it to add support for new annotations.

Solution: Refactor; improve handling annotations, remove IMSAll state
as it's not required, rename functions.
---
 src/Xrefcheck/Core.hs                         |   2 +-
 src/Xrefcheck/Scanners/Markdown.hs            | 174 ++++++++++--------
 tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs |   2 +-
 tests/golden/check-scan-errors/expected.gold  |   2 +-
 4 files changed, 98 insertions(+), 82 deletions(-)

diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs
index 5a9d58c6..9c3ae450 100644
--- a/src/Xrefcheck/Core.hs
+++ b/src/Xrefcheck/Core.hs
@@ -15,9 +15,9 @@ import Control.Lens (makeLenses)
 import Data.Aeson (FromJSON (..), withText)
 import Data.Char (isAlphaNum)
 import Data.Char qualified as C
-import Data.Default (Default (..))
 import Data.DList (DList)
 import Data.DList qualified as DList
+import Data.Default (Default (..))
 import Data.List qualified as L
 import Data.Reflection (Given)
 import Data.Text qualified as T
diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs
index 14f68275..6bcd5b60 100644
--- a/src/Xrefcheck/Scanners/Markdown.hs
+++ b/src/Xrefcheck/Scanners/Markdown.hs
@@ -102,26 +102,24 @@ data IgnoreLinkState
   -- and we should change `IgnoreLinkState`, because it's not a problem if
   -- our node's first child doesn't contain a link. So this status means that
   -- we won't throw errors if we don't find a link for now
-  deriving stock (Eq)
+  deriving stock (Eq, Show)
 
 data IgnoreModeState
   = IMSLink IgnoreLinkState
   | IMSParagraph
-  | IMSAll
-  deriving stock (Eq)
+  deriving stock (Eq, Show)
 
 -- | Bind `IgnoreMode` to its `PosInfo` so that we can tell where the
 -- corresponding annotation was declared.
 data Ignore = Ignore
   { _ignoreMode :: IgnoreModeState
   , _ignorePos :: Maybe PosInfo
-  }
+  } deriving stock (Show)
 makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore
 
-data GetIgnoreMode
-  = NotAnAnnotation
-  | ValidMode IgnoreMode
-  | InvalidMode Text
+data GetAnnotation
+  = IgnoreAnnotation IgnoreMode
+  | InvalidAnnotation Text
   deriving stock (Eq)
 
 
@@ -167,48 +165,43 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
       -> ScannerM Node
     remove pos ty subs = do
       let node = Node pos ty []
-      scan <- use ssIgnore >>= \case
+      scan <- use ssIgnore >>= \e -> do
         -- When no `Ignore` state is set check next node for annotation,
         -- if found then set it as new `IgnoreMode` otherwise skip node.
-        Nothing                    -> handleIgnoreMode pos ty subs $ getIgnoreMode node
-        Just (Ignore mode modePos) ->
-          case (mode, ty) of
-            -- We expect to find a paragraph immediately after the
-            -- `ignore paragraph` annotanion. If the paragraph is not
-            -- found we should report an error.
-            (IMSParagraph, PARAGRAPH)    -> (ssIgnore .= Nothing) $> defNode
-            (IMSParagraph, x)            -> do
-              lift . tell . makeError modePos fp . ParagraphErr $ prettyType x
-              ssIgnore .= Nothing
-              Node pos ty <$> sequence subs
-
-            -- We don't expect to find an `ignore all` annotation here,
-            -- since that annotation should be at the top of the file and
-            -- the file should already be ignored when `checkIgnoreFile` is called.
-            -- We should report an error if we find it anyway.
-            (IMSAll, _)                 -> do
-              lift . tell $ makeError modePos fp FileErr
-              ssIgnore .= Nothing
-              Node pos ty <$> sequence subs
-
-            (IMSLink _, LINK {})         -> do
-              ssIgnore .= Nothing
-              return defNode
-            (IMSLink _, IMAGE {})        -> do
-              ssIgnore .= Nothing
-              return defNode
-            (IMSLink ignoreLinkState, _) -> do
-              when (ignoreLinkState == ExpectingLinkInSubnodes) $
-                ssIgnore . _Just . ignoreMode .=  IMSLink ParentExpectsLink
-              node' <- Node pos ty <$> sequence subs
-              when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
-                currentIgnore <- use ssIgnore
-                case currentIgnore of
-                  Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
-                    lift $ tell $ makeError modePos fp LinkErr
+        let mbAnnotation = getAnnotation node
+        case mbAnnotation of
+          Just ann -> handleAnnotation pos ty ann
+          Nothing -> case e of
+            Nothing                    -> Node pos ty <$> sequence subs
+            Just (Ignore mode modePos) ->
+                case (mode, ty) of
+                  -- We expect to find a paragraph immediately after the
+                  -- `ignore paragraph` annotanion. If the paragraph is not
+                  -- found we should report an error.
+                  (IMSParagraph, PARAGRAPH)    -> (ssIgnore .= Nothing) $> defNode
+                  (IMSParagraph, x)            -> do
+                    lift . tell . makeError modePos fp . ParagraphErr $ prettyType x
+                    ssIgnore .= Nothing
+                    Node pos ty <$> sequence subs
+
+                  (IMSLink _, LINK {})         -> do
+                    ssIgnore .= Nothing
+                    return defNode
+                  (IMSLink _, IMAGE {})        -> do
                     ssIgnore .= Nothing
-                  _ -> pass
-              return node'
+                    return defNode
+                  (IMSLink ignoreLinkState, _) -> do
+                    when (ignoreLinkState == ExpectingLinkInSubnodes) $
+                      ssIgnore . _Just . ignoreMode .=  IMSLink ParentExpectsLink
+                    node' <- Node pos ty <$> sequence subs
+                    when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
+                      currentIgnore <- use ssIgnore
+                      case currentIgnore of
+                        Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
+                          lift $ tell $ makeError modePos fp LinkErr
+                          ssIgnore .= Nothing
+                        _ -> pass
+                    return node'
 
       when (ty == PARAGRAPH) $ use ssIgnore >>= \case
         Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) ->
@@ -217,28 +210,47 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
 
       return scan
 
-    handleIgnoreMode
+    handleAnnotation
       :: Maybe PosInfo
       -> NodeType
-      -> [ScannerM Node]
-      -> GetIgnoreMode
+      -> GetAnnotation
       -> ScannerM Node
-    handleIgnoreMode pos nodeType subs = \case
-      ValidMode mode  -> do
-        ignoreModeState <- case mode of
-          IMLink -> use ssParentNodeType <&> IMSLink . \case
-             Just PARAGRAPH -> ExpectingLinkInParagraph
-             _ -> ExpectingLinkInSubnodes
-
-          IMParagraph -> pure IMSParagraph
-
-          IMAll -> pure IMSAll
-
-        (ssIgnore .= Just (Ignore ignoreModeState correctPos)) $> defNode
-      InvalidMode msg -> do
+    handleAnnotation pos nodeType = \case
+      IgnoreAnnotation mode  -> do
+        let reportIfThereWasAnnotation :: ScannerM ()
+            reportIfThereWasAnnotation = do
+              curIgnore <- use ssIgnore
+              whenJust curIgnore $ \case
+                Ignore IMSParagraph prevPos ->
+                  lift . tell . makeError prevPos fp . ParagraphErr $ prettyType nodeType
+                Ignore (IMSLink _) prevPos ->
+                  lift $ tell $ makeError prevPos fp LinkErr
+
+        mbIgnoreModeState <- case mode of
+          IMLink -> do
+            reportIfThereWasAnnotation
+            use ssParentNodeType <&> Just . IMSLink . \case
+              Just PARAGRAPH -> ExpectingLinkInParagraph
+              _ -> ExpectingLinkInSubnodes
+
+          IMParagraph -> do
+            reportIfThereWasAnnotation
+            pure $ Just IMSParagraph
+
+          -- We don't expect to find an `ignore all` annotation here,
+          -- since that annotation should be at the top of the file and
+          -- the file should already be ignored when `checkIgnoreFile` is called.
+          -- We should report an error if we find it anyway.
+          IMAll -> do
+            lift . tell $ makeError correctPos fp FileErr
+            pure Nothing
+
+        whenJust mbIgnoreModeState $ \ignoreModeState ->
+          (ssIgnore .= Just (Ignore ignoreModeState correctPos))
+        pure defNode
+      InvalidAnnotation msg -> do
         lift . tell $ makeError correctPos fp $ UnrecognisedErr msg
-        (ssIgnore .= Nothing) $> defNode
-      NotAnAnnotation -> Node pos nodeType <$> sequence subs
+        pure defNode
       where
         correctPos = getPosition $ Node pos nodeType []
 
@@ -261,9 +273,6 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
         IMSLink _ -> do
             tell $ makeError pos fp LinkErr
             pure node
-        IMSAll -> do
-            tell $ makeError pos fp FileErr
-            pure node
       (node, _) -> pure node
 
 -- | Custom `foldMap` for source tree.
@@ -354,7 +363,7 @@ checkIgnoreAllFile nodes =
     isComment = isJust . getCommentContent
 
     isIgnoreFile :: Node -> Bool
-    isIgnoreFile = (ValidMode IMAll ==) . getIgnoreMode
+    isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation
 
 defNode :: Node
 defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
@@ -395,16 +404,23 @@ getPosition node@(Node pos _ _) = do
   pure $ PosInfo sl sc sl (sc + annLength - 1)
 
 -- | Extract `IgnoreMode` if current node is xrefcheck annotation.
-getIgnoreMode :: Node -> GetIgnoreMode
-getIgnoreMode node = maybe NotAnAnnotation (textToMode . words) (getXrefcheckContent node)
-
-textToMode :: [Text] -> GetIgnoreMode
-textToMode ("ignore" : [x])
-  | x == "link"      = ValidMode IMLink
-  | x == "paragraph" = ValidMode IMParagraph
-  | x == "all"      = ValidMode IMAll
-  | otherwise        = InvalidMode x
-textToMode _         = NotAnAnnotation
+getAnnotation :: Node -> Maybe GetAnnotation
+getAnnotation node = getXrefcheckContent node <&> textToMode
+
+textToMode :: Text -> GetAnnotation
+textToMode annText = case wordsList of
+  ("ignore" : [x])
+    | Just ignMode <- getIgnoreMode x -> IgnoreAnnotation ignMode
+  _ -> InvalidAnnotation annText
+  where
+    wordsList = words annText
+
+getIgnoreMode :: Text -> Maybe IgnoreMode
+getIgnoreMode = \case
+  "link" -> Just IMLink
+  "paragraph" -> Just IMParagraph
+  "all" -> Just IMAll
+  _ -> Nothing
 
 parseFileInfo :: MarkdownConfig -> FilePath -> LT.Text -> (FileInfo, [ScanError])
 parseFileInfo config fp input
diff --git a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs
index 0eeebd3c..c905bb5c 100644
--- a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs
+++ b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs
@@ -34,7 +34,7 @@ test_ignoreAnnotations =
       , testCase "Check if broken unrecognised annotation produce error" do
           let file = "tests/markdowns/with-annotations/unrecognised_option.md"
           errs <- getErrs file
-          errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "unrecognised-option")
+          errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "ignore unrecognised-option")
       ]
   , testGroup "\"ignore link\" mode"
       [ testCase "Check \"ignore link\" performance" $ do
diff --git a/tests/golden/check-scan-errors/expected.gold b/tests/golden/check-scan-errors/expected.gold
index 252f7f53..a1933ffe 100644
--- a/tests/golden/check-scan-errors/expected.gold
+++ b/tests/golden/check-scan-errors/expected.gold
@@ -18,7 +18,7 @@
   ➥  In file check-scan-errors.md
      scan error at src:21:1-50:
 
-     Unrecognised option "unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all">
+     Unrecognised option "ignore unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all">
 
   ➥  In file check-second-file.md
      scan error at src:9:1-29:

From e2ef5dbf6ae4c424583d3693e91455a449e6b3b6 Mon Sep 17 00:00:00 2001
From: Yuri Romanowski <yuri.romanowski@serokell.io>
Date: Tue, 13 Dec 2022 19:01:25 +0500
Subject: [PATCH 2/3] [#64] Refactor the markdown scanner

Problem: Current implementation of the markdown scanner is hard
to extend, so we need to refactor it to add support for new annotations.

Solution: Refactor; isolate processing annotations for different
types of nodes.
---
 src/Xrefcheck/Scanners/Markdown.hs | 240 ++++++++++++++++++-----------
 1 file changed, 151 insertions(+), 89 deletions(-)

diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs
index 6bcd5b60..8a053cbf 100644
--- a/src/Xrefcheck/Scanners/Markdown.hs
+++ b/src/Xrefcheck/Scanners/Markdown.hs
@@ -19,14 +19,15 @@ module Xrefcheck.Scanners.Markdown
 
 import Universum
 
-import CMarkGFM
-  (Node (..), NodeType (..), PosInfo (..), commonmarkToNode, extAutolink, optFootnotes)
+import CMarkGFM (NodeType (..), PosInfo (..), commonmarkToNode, extAutolink, optFootnotes)
+import CMarkGFM qualified as C
 import Control.Lens (_Just, makeLenses, makeLensesFor, (.=))
 import Control.Monad.Trans.Writer.CPS (Writer, runWriter, tell)
 import Data.Aeson (FromJSON (..), genericParseJSON)
 import Data.ByteString.Lazy qualified as BSL
 import Data.DList qualified as DList
 import Data.Default (def)
+import Data.List (span)
 import Data.Text qualified as T
 import Data.Text.Lazy qualified as LT
 import Fmt (Buildable (..), nameF)
@@ -49,8 +50,8 @@ defGithubMdConfig = MarkdownConfig
   { mcFlavor = GitHub
   }
 
-instance Buildable Node where
-  build (Node _mpos ty mSubs) = nameF (show ty) $
+instance Buildable C.Node where
+  build (C.Node _mpos ty mSubs) = nameF (show ty) $
     maybe "[]" interpolateBlockListF (nonEmpty mSubs)
 
 toPosition :: Maybe PosInfo -> Position
@@ -67,7 +68,7 @@ toPosition = Position . \case
         |]
 
 -- | Extract text from the topmost node.
-nodeExtractText :: Node -> Text
+nodeExtractText :: (C.Node) -> Text
 nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten
   where
     extractText = \case
@@ -75,8 +76,8 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten
       CODE t -> t
       _ -> ""
 
-    nodeFlatten :: Node -> [NodeType]
-    nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs
+    nodeFlatten :: (C.Node) -> [NodeType]
+    nodeFlatten (C.Node _pos ty subs) = ty : concatMap nodeFlatten subs
 
 
 data IgnoreMode
@@ -140,13 +141,13 @@ initialScannerState = ScannerState
 type ScannerM a = StateT ScannerState (Writer [ScanError]) a
 
 -- | A fold over a `Node`.
-cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> Node -> c
-cataNode f (Node pos ty subs) = f pos ty (cataNode f <$> subs)
+cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> C.Node -> c
+cataNode f (C.Node pos ty subs) = f pos ty (cataNode f <$> subs)
 
--- | Sets correct @_ssParentNodeType@ before running scanner on each node
+-- | Sets correct @_ssParentNodeType@ before running scanner on each node.
 cataNodeWithParentNodeInfo
   :: (Maybe PosInfo -> NodeType -> [ScannerM a] -> ScannerM a)
-  -> Node
+  -> C.Node
   -> ScannerM a
 cataNodeWithParentNodeInfo f node = cataNode f' node
   where
@@ -154,67 +155,118 @@ cataNodeWithParentNodeInfo f node = cataNode f' node
       map (ssParentNodeType .= Just ty >>) childScanners
 
 -- | Find ignore annotations (ignore paragraph and ignore link)
--- and remove nodes that should be ignored
-removeIgnored :: FilePath -> Node -> Writer [ScanError] Node
-removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
+-- and remove nodes that should be ignored.
+processAnnotations :: FilePath -> C.Node -> Writer [ScanError] C.Node
+processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process
   where
-    remove
+    process
       :: Maybe PosInfo
       -> NodeType
-      -> [ScannerM Node]
-      -> ScannerM Node
-    remove pos ty subs = do
-      let node = Node pos ty []
-      scan <- use ssIgnore >>= \e -> do
+      -> [ScannerM C.Node]
+      -> ScannerM C.Node
+    process pos ty subs = do
+      let node = C.Node pos ty []
+      use ssIgnore >>= \ign -> do
         -- When no `Ignore` state is set check next node for annotation,
         -- if found then set it as new `IgnoreMode` otherwise skip node.
         let mbAnnotation = getAnnotation node
         case mbAnnotation of
           Just ann -> handleAnnotation pos ty ann
-          Nothing -> case e of
-            Nothing                    -> Node pos ty <$> sequence subs
-            Just (Ignore mode modePos) ->
-                case (mode, ty) of
-                  -- We expect to find a paragraph immediately after the
-                  -- `ignore paragraph` annotanion. If the paragraph is not
-                  -- found we should report an error.
-                  (IMSParagraph, PARAGRAPH)    -> (ssIgnore .= Nothing) $> defNode
-                  (IMSParagraph, x)            -> do
-                    lift . tell . makeError modePos fp . ParagraphErr $ prettyType x
-                    ssIgnore .= Nothing
-                    Node pos ty <$> sequence subs
-
-                  (IMSLink _, LINK {})         -> do
-                    ssIgnore .= Nothing
-                    return defNode
-                  (IMSLink _, IMAGE {})        -> do
-                    ssIgnore .= Nothing
-                    return defNode
-                  (IMSLink ignoreLinkState, _) -> do
-                    when (ignoreLinkState == ExpectingLinkInSubnodes) $
-                      ssIgnore . _Just . ignoreMode .=  IMSLink ParentExpectsLink
-                    node' <- Node pos ty <$> sequence subs
-                    when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
-                      currentIgnore <- use ssIgnore
-                      case currentIgnore of
-                        Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
-                          lift $ tell $ makeError modePos fp LinkErr
-                          ssIgnore .= Nothing
-                        _ -> pass
-                    return node'
-
-      when (ty == PARAGRAPH) $ use ssIgnore >>= \case
+          Nothing -> do
+            case ty of
+              PARAGRAPH -> handleParagraph ign pos ty subs
+              LINK {}   -> handleLink      ign pos ty subs
+              IMAGE {}  -> handleLink      ign pos ty subs
+              _         -> handleOther     ign pos ty subs
+
+    handleLink ::
+      Maybe Ignore ->
+      Maybe PosInfo ->
+      NodeType ->
+      [ScannerM C.Node] ->
+      ScannerM C.Node
+    handleLink ign pos ty subs = do
+      let traverseChildren = C.Node pos ty <$> sequence subs
+      -- It can be checked that it's correct for all the cases
+      ssIgnore .= Nothing
+
+      case ign of
+        Nothing -> traverseChildren
+        Just (Ignore IMSParagraph modePos) -> do
+          reportExpectedParagraphAfterIgnoreAnnotation modePos ty
+          traverseChildren
+        Just (Ignore (IMSLink _) _) -> do
+          pure defNode
+
+    handleParagraph ::
+      Maybe Ignore ->
+      Maybe PosInfo ->
+      NodeType ->
+      [ScannerM C.Node] ->
+      ScannerM C.Node
+    handleParagraph ign pos ty subs = do
+      let traverseChildren = C.Node pos ty <$> sequence subs
+      node <- case ign of
+        Nothing -> traverseChildren
+        Just (Ignore IMSParagraph _) -> do
+          ssIgnore .= Nothing
+          pure defNode
+        Just (Ignore (IMSLink ignoreLinkState) modePos) ->
+          traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs
+
+      use ssIgnore >>= \case
         Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) ->
           lift $ tell $ makeError pragmaPos fp LinkErr
         _ -> pass
-
-      return scan
+      pure node
+
+    handleOther ::
+      Maybe Ignore ->
+      Maybe PosInfo ->
+      NodeType ->
+      [ScannerM C.Node] ->
+      ScannerM C.Node
+    handleOther ign pos ty subs = do
+      let traverseChildren = C.Node pos ty <$> sequence subs
+
+      case ign of
+        Nothing -> traverseChildren
+        Just (Ignore IMSParagraph modePos) -> do
+          reportExpectedParagraphAfterIgnoreAnnotation modePos ty
+          ssIgnore .= Nothing
+          traverseChildren
+        Just (Ignore (IMSLink ignoreLinkState) modePos) -> do
+          traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs
+
+    reportExpectedParagraphAfterIgnoreAnnotation :: Maybe PosInfo -> NodeType -> ScannerM ()
+    reportExpectedParagraphAfterIgnoreAnnotation modePos ty =
+      lift . tell . makeError modePos fp . ParagraphErr $ prettyType ty
+
+    traverseNodeWithLinkExpected ::
+      IgnoreLinkState ->
+      Maybe PosInfo ->
+      Maybe PosInfo ->
+      NodeType ->
+      [ScannerM C.Node] ->
+      ScannerM C.Node
+    traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs = do
+      when (ignoreLinkState == ExpectingLinkInSubnodes) $
+        ssIgnore . _Just . ignoreMode .=  IMSLink ParentExpectsLink
+      node' <- C.Node pos ty <$> sequence subs
+      when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
+        currentIgnore <- use ssIgnore
+        case currentIgnore of
+          Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
+            lift $ tell $ makeError modePos fp LinkErr
+            ssIgnore .= Nothing
+          _ -> pass
+      return node'
 
     handleAnnotation
       :: Maybe PosInfo
       -> NodeType
       -> GetAnnotation
-      -> ScannerM Node
+      -> ScannerM C.Node
     handleAnnotation pos nodeType = \case
       IgnoreAnnotation mode  -> do
         let reportIfThereWasAnnotation :: ScannerM ()
@@ -252,7 +304,7 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
         lift . tell $ makeError correctPos fp $ UnrecognisedErr msg
         pure defNode
       where
-        correctPos = getPosition $ Node pos nodeType []
+        correctPos = getPosition $ C.Node pos nodeType []
 
     prettyType :: NodeType -> Text
     prettyType ty =
@@ -260,8 +312,8 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
       in fromMaybe "" mType
 
     withIgnoreMode
-      :: ScannerM Node
-      -> Writer [ScanError] Node
+      :: ScannerM C.Node
+      -> Writer [ScanError] C.Node
     withIgnoreMode action = action `runStateT` initialScannerState >>= \case
       -- We expect `Ignore` state to be `Nothing` when we reach EOF,
       -- otherwise that means there was an annotation that didn't match
@@ -276,8 +328,8 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
       (node, _) -> pure node
 
 -- | Custom `foldMap` for source tree.
-foldNode :: (Monoid a, Monad m) => (Node -> m a) -> Node -> m a
-foldNode action node@(Node _ _ subs) = do
+foldNode :: (Monoid a, Monad m) => (C.Node -> m a) -> C.Node -> m a
+foldNode action node@(C.Node _ _ subs) = do
   a <- action node
   b <- concatForM subs (foldNode action)
   return (a <> b)
@@ -287,16 +339,19 @@ type ExtractorM a = ReaderT MarkdownConfig (Writer [ScanError]) a
 -- | Extract information from source tree.
 nodeExtractInfo
   :: FilePath
-  -> Node
+  -> C.Node
   -> ExtractorM FileInfo
-nodeExtractInfo fp input@(Node _ _ nSubs) = do
-  if checkIgnoreAllFile nSubs
+nodeExtractInfo fp (C.Node nPos nTy nSubs) = do
+  let (ignoreFile, contentNodes) = checkGlobalAnnotations nSubs
+  if ignoreFile
   then return def
-  else diffToFileInfo <$> (foldNode extractor =<< lift (removeIgnored fp input))
+  else diffToFileInfo <$>
+       (lift (processAnnotations fp $ C.Node nPos nTy contentNodes)
+         >>= foldNode extractor)
 
   where
-    extractor :: Node -> ExtractorM FileInfoDiff
-    extractor node@(Node pos ty _) =
+    extractor :: C.Node -> ExtractorM FileInfoDiff
+    extractor node@(C.Node pos ty _) =
       case ty of
         HTML_BLOCK _ -> do
           return mempty
@@ -349,24 +404,31 @@ nodeExtractInfo fp input@(Node _ _ nSubs) = do
            (DList.singleton $ Reference {rName, rPos, rLink, rAnchor})
            DList.empty
 
--- | Check if there is `ignore all` at the beginning of the file,
--- ignoring preceding comments if there are any.
-checkIgnoreAllFile :: [Node] -> Bool
-checkIgnoreAllFile nodes =
-  let isSimpleComment :: Node -> Bool
-      isSimpleComment node = isComment node && not (isIgnoreFile node)
-
-      mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes
-  in maybe False isIgnoreFile mIgnoreFile
+-- | Check for global annotations, ignoring simple comments if there are any.
+checkGlobalAnnotations :: [C.Node] -> (Bool, [C.Node])
+checkGlobalAnnotations nodes = do
+  let (headerNodes, contentsNodes) = span isHeaderNode nodes
+      ignoreFile = any isIgnoreFile headerNodes
+  (ignoreFile, contentsNodes)
   where
-    isComment :: Node -> Bool
-    isComment = isJust . getCommentContent
+    isSimpleComment :: C.Node -> Bool
+    isSimpleComment node = do
+      let isComment = isJust $ getCommentContent node
+          isNotXrefcheckAnnotation = isNothing $ getXrefcheckContent node
+      isComment && isNotXrefcheckAnnotation
 
-    isIgnoreFile :: Node -> Bool
+    isIgnoreFile :: C.Node -> Bool
     isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation
 
-defNode :: Node
-defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
+    isHeaderNode :: C.Node -> Bool
+    isHeaderNode node =
+      any ($ node)
+        [ isSimpleComment
+        , isIgnoreFile
+        ]
+
+defNode :: C.Node
+defNode = C.Node Nothing DOCUMENT [] -- hard-coded default Node
 
 makeError
   :: Maybe PosInfo
@@ -375,17 +437,17 @@ makeError
   -> [ScanError]
 makeError pos fp errDescription = one $ ScanError (toPosition pos) fp errDescription
 
-getCommentContent :: Node -> Maybe Text
+getCommentContent :: C.Node -> Maybe Text
 getCommentContent node = do
   txt <- getHTMLText node
   T.stripSuffix "-->" =<< T.stripPrefix "<!--" (T.strip txt)
 
-getHTMLText :: Node -> Maybe Text
-getHTMLText (Node _ (HTML_BLOCK txt) _) = Just txt
-getHTMLText (Node _ (HTML_INLINE txt) _) = Just txt
+getHTMLText :: C.Node -> Maybe Text
+getHTMLText (C.Node _ (HTML_BLOCK txt) _) = Just txt
+getHTMLText (C.Node _ (HTML_INLINE txt) _) = Just txt
 getHTMLText _ = Nothing
 
-getXrefcheckContent :: Node -> Maybe Text
+getXrefcheckContent :: C.Node -> Maybe Text
 getXrefcheckContent node =
   let notStripped = T.stripPrefix "xrefcheck:" . T.strip =<<
         getCommentContent node
@@ -397,14 +459,14 @@ getXrefcheckContent node =
 -- As our annotations are always oneliners, we can fix this by simply setting
 -- end line equals to start line and calculating end column from start column
 -- and annotation length.
-getPosition :: Node -> Maybe PosInfo
-getPosition node@(Node pos _ _) = do
+getPosition :: C.Node -> Maybe PosInfo
+getPosition node@(C.Node pos _ _) = do
   annLength <- length . T.strip <$> getHTMLText node
   PosInfo sl sc _ _ <- pos
   pure $ PosInfo sl sc sl (sc + annLength - 1)
 
 -- | Extract `IgnoreMode` if current node is xrefcheck annotation.
-getAnnotation :: Node -> Maybe GetAnnotation
+getAnnotation :: C.Node -> Maybe GetAnnotation
 getAnnotation node = getXrefcheckContent node <&> textToMode
 
 textToMode :: Text -> GetAnnotation

From 82e7292bde1473178053c3c1d30ce4a9be8a75d1 Mon Sep 17 00:00:00 2001
From: Yuri Romanowski <yuri.romanowski@serokell.io>
Date: Thu, 15 Dec 2022 01:25:12 +0500
Subject: [PATCH 3/3] Review: rename GetAnnotation to Annotation, remove extra
 spaces

---
 src/Xrefcheck/Scanners/Markdown.hs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs
index 8a053cbf..ce684b39 100644
--- a/src/Xrefcheck/Scanners/Markdown.hs
+++ b/src/Xrefcheck/Scanners/Markdown.hs
@@ -118,7 +118,7 @@ data Ignore = Ignore
   } deriving stock (Show)
 makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore
 
-data GetAnnotation
+data Annotation
   = IgnoreAnnotation IgnoreMode
   | InvalidAnnotation Text
   deriving stock (Eq)
@@ -251,7 +251,7 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process
       ScannerM C.Node
     traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs = do
       when (ignoreLinkState == ExpectingLinkInSubnodes) $
-        ssIgnore . _Just . ignoreMode .=  IMSLink ParentExpectsLink
+        ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink
       node' <- C.Node pos ty <$> sequence subs
       when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
         currentIgnore <- use ssIgnore
@@ -265,10 +265,10 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process
     handleAnnotation
       :: Maybe PosInfo
       -> NodeType
-      -> GetAnnotation
+      -> Annotation
       -> ScannerM C.Node
     handleAnnotation pos nodeType = \case
-      IgnoreAnnotation mode  -> do
+      IgnoreAnnotation mode -> do
         let reportIfThereWasAnnotation :: ScannerM ()
             reportIfThereWasAnnotation = do
               curIgnore <- use ssIgnore
@@ -465,11 +465,11 @@ getPosition node@(C.Node pos _ _) = do
   PosInfo sl sc _ _ <- pos
   pure $ PosInfo sl sc sl (sc + annLength - 1)
 
--- | Extract `IgnoreMode` if current node is xrefcheck annotation.
-getAnnotation :: C.Node -> Maybe GetAnnotation
+-- | Extract `Annotation` if current node is xrefcheck annotation.
+getAnnotation :: C.Node -> Maybe Annotation
 getAnnotation node = getXrefcheckContent node <&> textToMode
 
-textToMode :: Text -> GetAnnotation
+textToMode :: Text -> Annotation
 textToMode annText = case wordsList of
   ("ignore" : [x])
     | Just ignMode <- getIgnoreMode x -> IgnoreAnnotation ignMode