diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 5a9d58c6..9c3ae450 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -15,9 +15,9 @@ import Control.Lens (makeLenses) import Data.Aeson (FromJSON (..), withText) import Data.Char (isAlphaNum) import Data.Char qualified as C -import Data.Default (Default (..)) import Data.DList (DList) import Data.DList qualified as DList +import Data.Default (Default (..)) import Data.List qualified as L import Data.Reflection (Given) import Data.Text qualified as T diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index 14f68275..ce684b39 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -19,14 +19,15 @@ module Xrefcheck.Scanners.Markdown import Universum -import CMarkGFM - (Node (..), NodeType (..), PosInfo (..), commonmarkToNode, extAutolink, optFootnotes) +import CMarkGFM (NodeType (..), PosInfo (..), commonmarkToNode, extAutolink, optFootnotes) +import CMarkGFM qualified as C import Control.Lens (_Just, makeLenses, makeLensesFor, (.=)) import Control.Monad.Trans.Writer.CPS (Writer, runWriter, tell) import Data.Aeson (FromJSON (..), genericParseJSON) import Data.ByteString.Lazy qualified as BSL import Data.DList qualified as DList import Data.Default (def) +import Data.List (span) import Data.Text qualified as T import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), nameF) @@ -49,8 +50,8 @@ defGithubMdConfig = MarkdownConfig { mcFlavor = GitHub } -instance Buildable Node where - build (Node _mpos ty mSubs) = nameF (show ty) $ +instance Buildable C.Node where + build (C.Node _mpos ty mSubs) = nameF (show ty) $ maybe "[]" interpolateBlockListF (nonEmpty mSubs) toPosition :: Maybe PosInfo -> Position @@ -67,7 +68,7 @@ toPosition = Position . \case |] -- | Extract text from the topmost node. -nodeExtractText :: Node -> Text +nodeExtractText :: (C.Node) -> Text nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten where extractText = \case @@ -75,8 +76,8 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten CODE t -> t _ -> "" - nodeFlatten :: Node -> [NodeType] - nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs + nodeFlatten :: (C.Node) -> [NodeType] + nodeFlatten (C.Node _pos ty subs) = ty : concatMap nodeFlatten subs data IgnoreMode @@ -102,26 +103,24 @@ data IgnoreLinkState -- and we should change `IgnoreLinkState`, because it's not a problem if -- our node's first child doesn't contain a link. So this status means that -- we won't throw errors if we don't find a link for now - deriving stock (Eq) + deriving stock (Eq, Show) data IgnoreModeState = IMSLink IgnoreLinkState | IMSParagraph - | IMSAll - deriving stock (Eq) + deriving stock (Eq, Show) -- | Bind `IgnoreMode` to its `PosInfo` so that we can tell where the -- corresponding annotation was declared. data Ignore = Ignore { _ignoreMode :: IgnoreModeState , _ignorePos :: Maybe PosInfo - } + } deriving stock (Show) makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore -data GetIgnoreMode - = NotAnAnnotation - | ValidMode IgnoreMode - | InvalidMode Text +data Annotation + = IgnoreAnnotation IgnoreMode + | InvalidAnnotation Text deriving stock (Eq) @@ -142,13 +141,13 @@ initialScannerState = ScannerState type ScannerM a = StateT ScannerState (Writer [ScanError]) a -- | A fold over a `Node`. -cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> Node -> c -cataNode f (Node pos ty subs) = f pos ty (cataNode f <$> subs) +cataNode :: (Maybe PosInfo -> NodeType -> [c] -> c) -> C.Node -> c +cataNode f (C.Node pos ty subs) = f pos ty (cataNode f <$> subs) --- | Sets correct @_ssParentNodeType@ before running scanner on each node +-- | Sets correct @_ssParentNodeType@ before running scanner on each node. cataNodeWithParentNodeInfo :: (Maybe PosInfo -> NodeType -> [ScannerM a] -> ScannerM a) - -> Node + -> C.Node -> ScannerM a cataNodeWithParentNodeInfo f node = cataNode f' node where @@ -156,91 +155,156 @@ cataNodeWithParentNodeInfo f node = cataNode f' node map (ssParentNodeType .= Just ty >>) childScanners -- | Find ignore annotations (ignore paragraph and ignore link) --- and remove nodes that should be ignored -removeIgnored :: FilePath -> Node -> Writer [ScanError] Node -removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove +-- and remove nodes that should be ignored. +processAnnotations :: FilePath -> C.Node -> Writer [ScanError] C.Node +processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process where - remove + process :: Maybe PosInfo -> NodeType - -> [ScannerM Node] - -> ScannerM Node - remove pos ty subs = do - let node = Node pos ty [] - scan <- use ssIgnore >>= \case + -> [ScannerM C.Node] + -> ScannerM C.Node + process pos ty subs = do + let node = C.Node pos ty [] + use ssIgnore >>= \ign -> do -- When no `Ignore` state is set check next node for annotation, -- if found then set it as new `IgnoreMode` otherwise skip node. - Nothing -> handleIgnoreMode pos ty subs $ getIgnoreMode node - Just (Ignore mode modePos) -> - case (mode, ty) of - -- We expect to find a paragraph immediately after the - -- `ignore paragraph` annotanion. If the paragraph is not - -- found we should report an error. - (IMSParagraph, PARAGRAPH) -> (ssIgnore .= Nothing) $> defNode - (IMSParagraph, x) -> do - lift . tell . makeError modePos fp . ParagraphErr $ prettyType x - ssIgnore .= Nothing - Node pos ty <$> sequence subs - - -- We don't expect to find an `ignore all` annotation here, - -- since that annotation should be at the top of the file and - -- the file should already be ignored when `checkIgnoreFile` is called. - -- We should report an error if we find it anyway. - (IMSAll, _) -> do - lift . tell $ makeError modePos fp FileErr - ssIgnore .= Nothing - Node pos ty <$> sequence subs - - (IMSLink _, LINK {}) -> do - ssIgnore .= Nothing - return defNode - (IMSLink _, IMAGE {}) -> do - ssIgnore .= Nothing - return defNode - (IMSLink ignoreLinkState, _) -> do - when (ignoreLinkState == ExpectingLinkInSubnodes) $ - ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink - node' <- Node pos ty <$> sequence subs - when (ignoreLinkState == ExpectingLinkInSubnodes) $ do - currentIgnore <- use ssIgnore - case currentIgnore of - Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do - lift $ tell $ makeError modePos fp LinkErr - ssIgnore .= Nothing - _ -> pass - return node' - - when (ty == PARAGRAPH) $ use ssIgnore >>= \case + let mbAnnotation = getAnnotation node + case mbAnnotation of + Just ann -> handleAnnotation pos ty ann + Nothing -> do + case ty of + PARAGRAPH -> handleParagraph ign pos ty subs + LINK {} -> handleLink ign pos ty subs + IMAGE {} -> handleLink ign pos ty subs + _ -> handleOther ign pos ty subs + + handleLink :: + Maybe Ignore -> + Maybe PosInfo -> + NodeType -> + [ScannerM C.Node] -> + ScannerM C.Node + handleLink ign pos ty subs = do + let traverseChildren = C.Node pos ty <$> sequence subs + -- It can be checked that it's correct for all the cases + ssIgnore .= Nothing + + case ign of + Nothing -> traverseChildren + Just (Ignore IMSParagraph modePos) -> do + reportExpectedParagraphAfterIgnoreAnnotation modePos ty + traverseChildren + Just (Ignore (IMSLink _) _) -> do + pure defNode + + handleParagraph :: + Maybe Ignore -> + Maybe PosInfo -> + NodeType -> + [ScannerM C.Node] -> + ScannerM C.Node + handleParagraph ign pos ty subs = do + let traverseChildren = C.Node pos ty <$> sequence subs + node <- case ign of + Nothing -> traverseChildren + Just (Ignore IMSParagraph _) -> do + ssIgnore .= Nothing + pure defNode + Just (Ignore (IMSLink ignoreLinkState) modePos) -> + traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + + use ssIgnore >>= \case Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> lift $ tell $ makeError pragmaPos fp LinkErr _ -> pass - - return scan - - handleIgnoreMode + pure node + + handleOther :: + Maybe Ignore -> + Maybe PosInfo -> + NodeType -> + [ScannerM C.Node] -> + ScannerM C.Node + handleOther ign pos ty subs = do + let traverseChildren = C.Node pos ty <$> sequence subs + + case ign of + Nothing -> traverseChildren + Just (Ignore IMSParagraph modePos) -> do + reportExpectedParagraphAfterIgnoreAnnotation modePos ty + ssIgnore .= Nothing + traverseChildren + Just (Ignore (IMSLink ignoreLinkState) modePos) -> do + traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + + reportExpectedParagraphAfterIgnoreAnnotation :: Maybe PosInfo -> NodeType -> ScannerM () + reportExpectedParagraphAfterIgnoreAnnotation modePos ty = + lift . tell . makeError modePos fp . ParagraphErr $ prettyType ty + + traverseNodeWithLinkExpected :: + IgnoreLinkState -> + Maybe PosInfo -> + Maybe PosInfo -> + NodeType -> + [ScannerM C.Node] -> + ScannerM C.Node + traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs = do + when (ignoreLinkState == ExpectingLinkInSubnodes) $ + ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink + node' <- C.Node pos ty <$> sequence subs + when (ignoreLinkState == ExpectingLinkInSubnodes) $ do + currentIgnore <- use ssIgnore + case currentIgnore of + Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do + lift $ tell $ makeError modePos fp LinkErr + ssIgnore .= Nothing + _ -> pass + return node' + + handleAnnotation :: Maybe PosInfo -> NodeType - -> [ScannerM Node] - -> GetIgnoreMode - -> ScannerM Node - handleIgnoreMode pos nodeType subs = \case - ValidMode mode -> do - ignoreModeState <- case mode of - IMLink -> use ssParentNodeType <&> IMSLink . \case - Just PARAGRAPH -> ExpectingLinkInParagraph - _ -> ExpectingLinkInSubnodes - - IMParagraph -> pure IMSParagraph - - IMAll -> pure IMSAll - - (ssIgnore .= Just (Ignore ignoreModeState correctPos)) $> defNode - InvalidMode msg -> do + -> Annotation + -> ScannerM C.Node + handleAnnotation pos nodeType = \case + IgnoreAnnotation mode -> do + let reportIfThereWasAnnotation :: ScannerM () + reportIfThereWasAnnotation = do + curIgnore <- use ssIgnore + whenJust curIgnore $ \case + Ignore IMSParagraph prevPos -> + lift . tell . makeError prevPos fp . ParagraphErr $ prettyType nodeType + Ignore (IMSLink _) prevPos -> + lift $ tell $ makeError prevPos fp LinkErr + + mbIgnoreModeState <- case mode of + IMLink -> do + reportIfThereWasAnnotation + use ssParentNodeType <&> Just . IMSLink . \case + Just PARAGRAPH -> ExpectingLinkInParagraph + _ -> ExpectingLinkInSubnodes + + IMParagraph -> do + reportIfThereWasAnnotation + pure $ Just IMSParagraph + + -- We don't expect to find an `ignore all` annotation here, + -- since that annotation should be at the top of the file and + -- the file should already be ignored when `checkIgnoreFile` is called. + -- We should report an error if we find it anyway. + IMAll -> do + lift . tell $ makeError correctPos fp FileErr + pure Nothing + + whenJust mbIgnoreModeState $ \ignoreModeState -> + (ssIgnore .= Just (Ignore ignoreModeState correctPos)) + pure defNode + InvalidAnnotation msg -> do lift . tell $ makeError correctPos fp $ UnrecognisedErr msg - (ssIgnore .= Nothing) $> defNode - NotAnAnnotation -> Node pos nodeType <$> sequence subs + pure defNode where - correctPos = getPosition $ Node pos nodeType [] + correctPos = getPosition $ C.Node pos nodeType [] prettyType :: NodeType -> Text prettyType ty = @@ -248,8 +312,8 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove in fromMaybe "" mType withIgnoreMode - :: ScannerM Node - -> Writer [ScanError] Node + :: ScannerM C.Node + -> Writer [ScanError] C.Node withIgnoreMode action = action `runStateT` initialScannerState >>= \case -- We expect `Ignore` state to be `Nothing` when we reach EOF, -- otherwise that means there was an annotation that didn't match @@ -261,14 +325,11 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove IMSLink _ -> do tell $ makeError pos fp LinkErr pure node - IMSAll -> do - tell $ makeError pos fp FileErr - pure node (node, _) -> pure node -- | Custom `foldMap` for source tree. -foldNode :: (Monoid a, Monad m) => (Node -> m a) -> Node -> m a -foldNode action node@(Node _ _ subs) = do +foldNode :: (Monoid a, Monad m) => (C.Node -> m a) -> C.Node -> m a +foldNode action node@(C.Node _ _ subs) = do a <- action node b <- concatForM subs (foldNode action) return (a <> b) @@ -278,16 +339,19 @@ type ExtractorM a = ReaderT MarkdownConfig (Writer [ScanError]) a -- | Extract information from source tree. nodeExtractInfo :: FilePath - -> Node + -> C.Node -> ExtractorM FileInfo -nodeExtractInfo fp input@(Node _ _ nSubs) = do - if checkIgnoreAllFile nSubs +nodeExtractInfo fp (C.Node nPos nTy nSubs) = do + let (ignoreFile, contentNodes) = checkGlobalAnnotations nSubs + if ignoreFile then return def - else diffToFileInfo <$> (foldNode extractor =<< lift (removeIgnored fp input)) + else diffToFileInfo <$> + (lift (processAnnotations fp $ C.Node nPos nTy contentNodes) + >>= foldNode extractor) where - extractor :: Node -> ExtractorM FileInfoDiff - extractor node@(Node pos ty _) = + extractor :: C.Node -> ExtractorM FileInfoDiff + extractor node@(C.Node pos ty _) = case ty of HTML_BLOCK _ -> do return mempty @@ -340,24 +404,31 @@ nodeExtractInfo fp input@(Node _ _ nSubs) = do (DList.singleton $ Reference {rName, rPos, rLink, rAnchor}) DList.empty --- | Check if there is `ignore all` at the beginning of the file, --- ignoring preceding comments if there are any. -checkIgnoreAllFile :: [Node] -> Bool -checkIgnoreAllFile nodes = - let isSimpleComment :: Node -> Bool - isSimpleComment node = isComment node && not (isIgnoreFile node) - - mIgnoreFile = safeHead $ dropWhile isSimpleComment nodes - in maybe False isIgnoreFile mIgnoreFile +-- | Check for global annotations, ignoring simple comments if there are any. +checkGlobalAnnotations :: [C.Node] -> (Bool, [C.Node]) +checkGlobalAnnotations nodes = do + let (headerNodes, contentsNodes) = span isHeaderNode nodes + ignoreFile = any isIgnoreFile headerNodes + (ignoreFile, contentsNodes) where - isComment :: Node -> Bool - isComment = isJust . getCommentContent + isSimpleComment :: C.Node -> Bool + isSimpleComment node = do + let isComment = isJust $ getCommentContent node + isNotXrefcheckAnnotation = isNothing $ getXrefcheckContent node + isComment && isNotXrefcheckAnnotation + + isIgnoreFile :: C.Node -> Bool + isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation - isIgnoreFile :: Node -> Bool - isIgnoreFile = (ValidMode IMAll ==) . getIgnoreMode + isHeaderNode :: C.Node -> Bool + isHeaderNode node = + any ($ node) + [ isSimpleComment + , isIgnoreFile + ] -defNode :: Node -defNode = Node Nothing DOCUMENT [] -- hard-coded default Node +defNode :: C.Node +defNode = C.Node Nothing DOCUMENT [] -- hard-coded default Node makeError :: Maybe PosInfo @@ -366,17 +437,17 @@ makeError -> [ScanError] makeError pos fp errDescription = one $ ScanError (toPosition pos) fp errDescription -getCommentContent :: Node -> Maybe Text +getCommentContent :: C.Node -> Maybe Text getCommentContent node = do txt <- getHTMLText node T.stripSuffix "-->" =<< T.stripPrefix "