Skip to content

Commit

Permalink
Fixed memory usage issue. Document ordering not perfect.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tass0sm committed Mar 14, 2021
1 parent c4f3e74 commit def8176
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 78 deletions.
11 changes: 3 additions & 8 deletions src-exe/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,11 @@ main :: IO ()
main = do
arg <- head <$> getArgs
graph <- analyzeHTML arg
putStrLn "Finished Analysis"
let newGraph = convertToTexinfo graph
putStrLn "Finished Conversion"
outputTexinfo "./out.texi" newGraph
--let sGraph = gmap show newGraph
--putStrLn $ exportAsIs sGraph

--putStrLn $ exportAsIs graph
--mapM_ (putStrLn . show) pairs

-- files <- listDirectoryRecursively "./src"
-- mapM_ putStrLn files
putStrLn "Finished Output"

-- (subcommand:args) <- getArgs
-- case subcommand of
Expand Down
34 changes: 23 additions & 11 deletions src/Graphdoc/Analysis/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import Graphdoc.Definition
import Graphdoc.Analysis.Util
import Graphdoc.Analysis.HTML.Links

import System.IO
import qualified Data.Text.IO as TIO
import qualified Data.Map as Map

import Text.Pandoc
import Algebra.Graph.Labelled.AdjacencyMap
Expand Down Expand Up @@ -35,9 +37,8 @@ getFileEdges file = do
newRef <- resolveLink file ref
return (rel, file, newRef)

getEveryFileEdge :: FilePath -> IO [(String, FilePath, FilePath)]
getEveryFileEdge topdir = do
files <- getSourceFiles sourceP topdir
getEveryFileEdge :: [FilePath] -> IO [(String, FilePath, FilePath)]
getEveryFileEdge files = do
concat <$> mapM getFileEdges files

---------------------------------------------------------------------------
Expand All @@ -50,21 +51,32 @@ getMetadata file = DocMeta { docMetaFormat = "HTML"
}

getVertex :: FilePath -> IO DocNode
getVertex file = do
text <- TIO.readFile file
let result = runPure $ readHtml def text
doc <- handleError result
return $ DocNode (getMetadata file) (Doc doc)
getVertex file = withFile file ReadMode
(\_ -> do
return $ getMetadata file)

toDocEdge :: (String, FilePath, FilePath) -> IO (String, DocNode, DocNode)
toDocEdge (s, f1, f2) = do
v1 <- getVertex f1
v2 <- getVertex f2
return (s, v1, v2)

getTableEntry :: FilePath -> IO (FilePath, DocSource)
getTableEntry file = withFile file ReadMode
(\h -> do
text <- TIO.hGetContents h
let result = runPure $! readHtml def text
doc <- handleError result
return (file, Doc doc))

analyzeHTML :: FilePath -> IO DocGraph
analyzeHTML topdir = do
fileEdgeList <- getEveryFileEdge topdir
docEdgeList <- mapM toDocEdge fileEdgeList
return $ edges docEdgeList
files <- getSourceFiles sourceP topdir
docMap <- Map.fromList <$> mapM getTableEntry files

fileEdgeList <- getEveryFileEdge files
return (docMap, edges $ fileEdgeList)




11 changes: 8 additions & 3 deletions src/Graphdoc/Conversion/Texinfo.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module Graphdoc.Conversion.Texinfo
( convertToTexinfo ) where

import Data.Maybe
import qualified Data.Map as Map

import Graphdoc.Definition
import Graphdoc.Conversion.Util
Expand All @@ -12,6 +13,8 @@ import Text.Pandoc
import Text.Pandoc.Walk
import System.FilePath

import Algebra.Graph.Labelled.AdjacencyMap

-- Will switch to lenses
convertMetadata :: DocMeta -> DocMeta
convertMetadata m@(DocMeta _ p _) =
Expand All @@ -27,6 +30,8 @@ convertPandoc = walk removeHeader
removeHeader x = x

convertToTexinfo :: DocGraph -> DocGraph
convertToTexinfo g =
let nodeConverter = liftConverter convertMetadata convertPandoc
in mapDocGraph nodeConverter g
convertToTexinfo (docMap, docGraph) =
let nodeConverter = liftConverter convertPandoc
newMap = Map.map nodeConverter docMap
newGraph = gmap (-<.> "texi") docGraph
in (newMap, docGraph)
17 changes: 5 additions & 12 deletions src/Graphdoc/Conversion/Util.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}

module Graphdoc.Conversion.Util
( liftConverter
, mapDocGraph ) where
( liftConverter ) where

import qualified Data.Text.IO as TIO
import Data.Text
Expand All @@ -13,13 +12,7 @@ import Text.Pandoc
import Algebra.Graph.Labelled.AdjacencyMap

-- Will switch to lenses
liftConverter :: (DocMeta -> DocMeta)
-> (Pandoc -> Pandoc)
-> (DocNode -> DocNode)
liftConverter metaF contentF =
\(DocNode m (Doc p)) -> DocNode (metaF m) (Doc $ contentF p)

mapDocGraph :: (DocNode -> DocNode) -> DocGraph -> DocGraph
mapDocGraph converter src = gmap converter src


liftConverter :: (Pandoc -> Pandoc)
-> (DocSource -> DocSource)
liftConverter contentF =
\(Doc p) -> (Doc $ contentF p)
27 changes: 12 additions & 15 deletions src/Graphdoc/Definition.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module Graphdoc.Definition
import Algebra.Graph.Labelled.AdjacencyMap
import Text.Pandoc.Definition
import Data.Text (Text)
import Data.Map

-- All the information for a node in the graph (metadata and source).
data DocMeta = DocMeta
Expand All @@ -16,25 +17,21 @@ data DocMeta = DocMeta
, docMetaIsTop :: Bool
} deriving (Show)

data DocSource = Body Text |
Doc Pandoc

-- The node type
data DocNode = DocNode DocMeta DocSource

instance Eq DocNode where
(==) (DocNode m1 _) (DocNode m2 _) =
(docMetaPath m1) == (docMetaPath m2)
instance Eq DocMeta where
(==) (DocMeta _ p1 _) (DocMeta _ p2 _) =
p1 == p2

instance Ord DocNode where
compare (DocNode m1 _) (DocNode m2 _) =
compare (docMetaPath m1) (docMetaPath m2)
instance Ord DocMeta where
compare (DocMeta _ p1 _) (DocMeta _ p2 _) =
compare p1 p2

instance Show DocNode where
show (DocNode m1 _) = docMetaPath m1
type DocNode = DocMeta

-- A standalone edge, for building graphs
type DocEdge = (String, DocNode, DocNode)

data DocSource = Body Text |
Doc Pandoc

-- A labelled graph of nodes, which captures a body of documentation.
type DocGraph = AdjacencyMap String DocNode
type DocGraph = (Map FilePath DocSource, AdjacencyMap String FilePath)
39 changes: 19 additions & 20 deletions src/Graphdoc/Output/Texinfo.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,37 @@ import Text.Pandoc.Writers
import Text.Pandoc.Builder

import System.IO
import System.FilePath
import Data.Maybe
import Data.Either
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.Map as Map

import Algebra.Graph.Export.Dot
import Algebra.Graph.Labelled.AdjacencyMap
import Algebra.Graph.AdjacencyMap.Algorithm
import qualified Algebra.Graph.AdjacencyMap as Unlabelled

findVertexWithPath :: String -> DocGraph -> DocNode
findVertexWithPath path graph =
let vertices = vertexList graph
findVertexWithBaseName :: String -> DocGraph -> String
findVertexWithBaseName path (_, docGraph) =
let vertices = vertexList docGraph
-- UNSAFE!!!
in head $ filter hasThePath vertices
where hasThePath = (path ==) . show
in head $ filter hasTheBaseName vertices
where hasTheBaseName = (path ==) . takeBaseName

simplifyGraph :: DocGraph -> Unlabelled.AdjacencyMap DocNode
simplifyGraph graph =
simplifiedGraph :: DocGraph -> Unlabelled.AdjacencyMap FilePath
simplifiedGraph (_, docGraph) =
let isTopEdge = ("TOP" == ) . (\(s, _, _) -> s)
labelledTopEdges = filter isTopEdge $ edgeList graph
labelledTopEdges = filter isTopEdge $ edgeList docGraph
unlabelledTopEdges = map (\(_, a, b) -> (b, a)) labelledTopEdges
in Unlabelled.edges unlabelledTopEdges

flattenGraph :: DocGraph -> [(Int, DocNode)]
flattenGraph :: DocGraph -> [(Int, FilePath)]
flattenGraph graph =
let rootNode = findVertexWithPath "/home/tassos/desktop/sample-doc/FrontMatter/index.texi" graph
simplifiedGraph = simplifyGraph graph
in dfsWithDepth [rootNode] simplifiedGraph
let rootNode = findVertexWithBaseName "index" graph
simpleGraph = simplifiedGraph graph
in dfsWithDepth [rootNode] simpleGraph

writer :: Writer PandocPure
writer = fromJust $ lookup "texinfo" writers
Expand All @@ -53,23 +55,20 @@ simpleConverter :: Pandoc -> T.Text
simpleConverter = makeConverter writer

outputTexinfo :: String -> DocGraph -> IO ()
outputTexinfo destination graph =
outputTexinfo destination graph@(docMap, _) =
let nodeList = flattenGraph graph
getDocWithDepth = \(d, (DocNode _ (Doc p))) -> (d, p) -- Replace with lenses?
getDocWithDepth = \(d, f) -> (d, fromJust $ Map.lookup f docMap) -- Replace with lenses?
docWithDepthList = map getDocWithDepth nodeList
prependHeader = \(d, Pandoc m bs) ->
prependHeader = \(d, (Doc (Pandoc m bs))) ->
let documentHeader = header d $ fromList $ docTitle m
existingBlocks = fromList bs
wholeBlocks = toList $ documentHeader <> existingBlocks
in Pandoc m wholeBlocks
docList = map prependHeader docWithDepthList
totalDoc = mconcat docList
docText = simpleConverter totalDoc
in do
putStrLn $ show nodeList
putStrLn $ show totalDoc
withFile destination WriteMode
(\h -> TIO.hPutStr h docText)
in withFile destination WriteMode
(\h -> TIO.hPutStr h docText)



Expand Down
9 changes: 0 additions & 9 deletions src/Graphdoc/Output/Util.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,3 @@ flattenWithDepth t = squish 1 t []

dfsWithDepth :: Ord a => [a] -> AdjacencyMap a -> [(Int, a)]
dfsWithDepth vs = dfsForestFrom vs >=> flattenWithDepth

-- docNodeContent :: DocNode -> Text
-- docNodeContent (DocNode _ (Body t)) = t
-- docNodeContent _ = ""
--
-- hPutDocNodes :: Handle -> [DocNode] -> IO ()
-- hPutDocNodes handle nodeList =
-- mapM_ (TIO.hPutStr handle . docNodeContent) nodeList
--

0 comments on commit def8176

Please sign in to comment.