From 51ab2cf393e0f05d7f8df23ca69422bf2261e448 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Wed, 30 Nov 2022 20:21:20 +0100 Subject: [PATCH] docbook2txt: halfway finish `` This required a lot of restructuring, which is why I won't put this on another branch. A part of the library which is updated in github but not yet at hackage, is needed to continue. Specifically, the `Monoid` instance in `Text.Layout.Table.Cell.Formatted` is needed to put together multiple colored text in a table. Among other things introduced, there is now our own `Formatted` type which is used to await formatting until things like tables come up. Compared to the earlier `IO ()` approach, this is probably a lot better. --- internals/docbook2txt/default.nix | 2 +- internals/docbook2txt/docbook2txt.hs | 195 +++++++++++++++++++++------ 2 files changed, 157 insertions(+), 40 deletions(-) diff --git a/internals/docbook2txt/default.nix b/internals/docbook2txt/default.nix index 53db948..b49b9bb 100644 --- a/internals/docbook2txt/default.nix +++ b/internals/docbook2txt/default.nix @@ -1,4 +1,4 @@ { pkgs, compiler ? "ghc924", ... }: pkgs.writers.writeHaskellBin "docbook2txt" { - libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split text ]; + libraries = with pkgs.haskellPackages; [ tagsoup ansi-terminal split table-layout text ]; } (builtins.readFile ./docbook2txt.hs) diff --git a/internals/docbook2txt/docbook2txt.hs b/internals/docbook2txt/docbook2txt.hs index ac4ce71..b23032e 100644 --- a/internals/docbook2txt/docbook2txt.hs +++ b/internals/docbook2txt/docbook2txt.hs @@ -8,16 +8,93 @@ -- end up having to write custom conversion logic for every tag to be -- consumed by pandoc anyway. So instead, I am just planning on keeping -- my own module parsing raw xml tags (for now). - +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} +import Data.Char (isSpace) import Data.List (find, intersperse) import Data.List.Split (splitOn) -import qualified System.Console.ANSI as AN +import Data.Maybe (fromMaybe) +import Data.String (IsString, fromString) +import qualified System.Console.ANSI.Codes as AN import qualified System.Console.ANSI.Types as AN import System.Environment (getArgs) import qualified Text.HTML.TagSoup as TS import qualified Text.HTML.TagSoup.Tree as TS +-- import qualified Text.Layout.Table as T +-- import qualified Text.Layout.Table.Cell as T +-- import qualified Text.Layout.Table.Cell.Formatted as T + +-- +-- Datatypes with relevant methods +-- + +-- TODO: Mark reflowable text, and do a reflow fold before print. + +-- data ShouldReflow a = SRReflow a +-- | SRConstant a +-- deriving (Show, Eq) + +data Formatted + = FSeveral [Formatted] + | FWrapped String Formatted String + | FPlain String + deriving (Eq) + +instance IsString Formatted where + fromString = FPlain + +instance Show Formatted where + show (FSeveral fs) = concatMap show fs + show (FWrapped w1 s w2) = concat [w1, show s, w2] + show (FPlain s) = s + +instance Semigroup Formatted where + FSeveral x <> FSeveral y = FSeveral $ x ++ y + f1 <> FSeveral x = FSeveral $ f1 : x + FSeveral x <> f1 = FSeveral $ x ++ [f1] + f1 <> f2 = FSeveral [f1, f2] + +instance Monoid Formatted where + mempty = FPlain mempty + +realString :: Formatted -> String +realString (FSeveral fs) = concatMap realString fs +realString (FWrapped w1 s w2) = realString s +realString (FPlain s) = s + +realLength :: Formatted -> Int +realLength = length . realString + +-- TODO: Revisit when table-layout gets a new release +-- https://github.com/muesli4/table-layout/issues/43 + +-- toTableFormattedType :: Formatted -> T.Formatted String +-- toTableFormattedType f = case f of +-- FSeveral fs -> mconcatMap toTableFormattedType fs +-- FWrapped w1 (FPlain s) w2 -> T.formatted w1 s w2 +-- FWrapped _ f1 _ -> toTableFormattedType f1 +-- FPlain s -> T.plain s + +data PotentiallyColorizedString = PCS + { colorized :: Formatted, + nonColorized :: String + } + deriving (Show, Eq) + +instance Semigroup PotentiallyColorizedString where + pcs1 <> pcs2 = + PCS + { colorized = colorized pcs1 <> colorized pcs2, + nonColorized = nonColorized pcs1 <> nonColorized pcs2 + } + +instance Monoid PotentiallyColorizedString where + mempty = + PCS + { colorized = mempty, + nonColorized = mempty + } main :: IO () main = do @@ -26,10 +103,32 @@ main = do let colorizedMode = "-C" `elem` args printTags colorizedMode $ map replaceTagColor $ removeParagraphTags $ TS.parseTree stdin -data PotentiallyColorizedString = PCS - { colorized :: IO (), - nonColorized :: String - } +-- Print a list of PCSs. +-- Depending on the first argument, the color can be optionally +-- colored. +printTags :: Bool -> [PotentiallyColorizedString] -> IO () +printTags False = putStrLn . unwords . map nonColorized +printTags True = putStrLn . mconcatMap (show . colorized) + +-- ANSI helpers + +wrapSGR :: AN.SGR -> String -> Formatted +wrapSGR sgr str = FWrapped (AN.setSGRCode [sgr]) (fromString str) (AN.setSGRCode [AN.Reset]) + +wrapColor :: AN.Color -> String -> Formatted +wrapColor = wrapSGR . AN.SetColor AN.Foreground AN.Vivid + +wrapTxt :: String -> String -> String +wrapTxt delimiter string = concat [delimiter, string, delimiter] + +wrapTxt' :: String -> Formatted -> Formatted +wrapTxt' delimiter string = FSeveral [fromString delimiter, string, fromString delimiter] + +bold :: AN.SGR +bold = AN.SetConsoleIntensity AN.BoldIntensity + +mconcatMap :: Monoid b => (a -> b) -> [a] -> b +mconcatMap f = mconcat . map f -- Remove `` tags. -- If there are more in one doc comment, the middle ones @@ -45,30 +144,6 @@ removeParagraphTags (TS.TagLeaf (TS.TagOpen "para" _) : rest) = removeParagraphT removeParagraphTags (x : y : rest) = x : removeParagraphTags (y : rest) removeParagraphTags x = x --- Print a list of PCSs. --- Depending on the first argument, the color can be optionally --- colored. -printTags :: Bool -> [PotentiallyColorizedString] -> IO () -printTags False = putStrLn . unwords . map nonColorized -printTags True = mapM_ colorized - --- ANSI helpers - -wrapSGR :: AN.SGR -> String -> IO () -wrapSGR sgr str = do - AN.setSGR [sgr] - putStr str - AN.setSGR [AN.Reset] - -wrapColor :: AN.Color -> String -> IO () -wrapColor c = wrapSGR (AN.SetColor AN.Foreground AN.Vivid c) - -wrapTxt :: String -> String -> String -wrapTxt delimiter string = concat [delimiter, string, delimiter] - -bold :: AN.SGR -bold = AN.SetConsoleIntensity AN.BoldIntensity - pattern TextLeaf a = TS.TagLeaf (TS.TagText a) -- Replace tags with their PCS string equivalent. @@ -76,12 +151,12 @@ replaceTagColor :: TS.TagTree String -> PotentiallyColorizedString replaceTagColor tagtree = case tagtree of TS.TagLeaf (TS.TagText s) -> PCS - { colorized = putStr s, + { colorized = fromString s, nonColorized = s } TS.TagBranch "para" _ inner -> PCS - { colorized = mapM_ (colorized . replaceTagColor) inner, + { colorized = mconcatMap (colorized . replaceTagColor) inner, nonColorized = concatMap (nonColorized . replaceTagColor) inner } TS.TagBranch "code" _ [TextLeaf content] -> @@ -136,12 +211,12 @@ replaceTagColor tagtree = case tagtree of } TS.TagBranch "quote" _ inner -> PCS - { colorized = sequence_ [putStr "\"", mapM_ (colorized . replaceTagColor) inner, putStr "\""], + { colorized = wrapTxt' "\"" $ mconcatMap (colorized . replaceTagColor) inner, nonColorized = wrapTxt "\"" $ concatMap (nonColorized . replaceTagColor) inner } TS.TagBranch "warning" _ inner -> PCS - { colorized = sequence_ [wrapColor AN.Red "WARNING: ", mapM_ (colorized . replaceTagColor) inner], + { colorized = mconcat [wrapColor AN.Red "WARNING: ", mconcatMap (colorized . replaceTagColor) inner], nonColorized = "WARNING: " ++ concatMap (nonColorized . replaceTagColor) inner } TS.TagBranch "xref" [("linkend", link)] [] -> @@ -149,15 +224,57 @@ replaceTagColor tagtree = case tagtree of removeOptPrefix ('o' : 'p' : 't' : '-' : rest) = rest removeOptPrefix x = x - replaceName :: String -> IO () - replaceName x = if x == "_name_" then wrapColor AN.Red "" else wrapSGR bold x + replaceName :: String -> Formatted + replaceName "_name_" = wrapColor AN.Red "" + replaceName x = wrapSGR bold x - formattedLink :: [IO ()] - formattedLink = intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link + formattedLink :: Formatted + formattedLink = mconcat $ intersperse (wrapSGR bold ".") $ map replaceName $ splitOn "." $ removeOptPrefix link in PCS - { colorized = sequence_ $ [putStr "`"] ++ formattedLink ++ [putStr "`"], + { colorized = wrapTxt' "`" formattedLink, nonColorized = wrapTxt "`" link } + -- TS.TagBranch "informaltable" _ [inner] -> + -- let + -- extractRows :: TS.TagTree String -> Maybe [TS.TagTree String] + -- extractRows (TS.TagBranch "tgroup" _ [TS.TagBranch "tbody" _ rows]) = Just rows + -- extractRows _ = Nothing + + -- -- TODO: This filters too much + -- isWhiteSpace :: TS.TagTree String -> Bool + -- isWhiteSpace (TextLeaf s) = False + -- isWhiteSpace _ = True + + -- parseRow :: TS.TagTree String -> Maybe [PotentiallyColorizedString] + -- parseRow (TS.TagBranch "row" _ entries) = sequence $ map parseEntry $ filter isWhiteSpace entries + -- parseRow _ = Nothing + + -- parseEntry :: TS.TagTree String -> Maybe PotentiallyColorizedString + -- parseEntry (TS.TagBranch "entry" _ content) = Just $ mconcatMap replaceTagColor content + -- parseEntry _ = Nothing + + -- rawRows :: Maybe [[PotentiallyColorizedString]] + -- rawRows = do + -- rows <- extractRows inner + -- sequence $ map parseRow $ filter isWhiteSpace rows + + -- generateColSpec :: [[a]] -> [T.ColSpec] + -- generateColSpec rs = flip replicate T.def $ length $ rs !! 0 + + -- generateTableConfig :: T.Cell a => [[b]] -> [T.RowGroup a] -> String + -- generateTableConfig rs = (++) "\n" . T.tableString (generateColSpec rs) T.unicodeRoundS T.def + + -- table :: T.Cell a => (PotentiallyColorizedString -> a) -> Maybe String + -- table f = case rawRows of + -- Nothing -> Nothing + -- Just rrs -> Just $ generateTableConfig rrs $ map (T.rowG . map f) $ rrs + + -- errorMessage :: String + -- errorMessage = "ERROR: Could not parse "; + -- in PCS { + -- colorized = fromMaybe (wrapColor AN.Red errorMessage) (FPlain <$> table (toTableFormattedType . colorized)), + -- nonColorized = fromMaybe errorMessage $ table nonColorized + -- } TS.TagBranch "citerefentry" _ content -> let tagBranchTagMatches :: String -> TS.TagTree String -> Bool tagBranchTagMatches x (TS.TagBranch tag _ _) = tag == x