-- |
-- Module      :  HsIndex.Parser
-- Copyright   :  Jean-Luc JOULIN 2018-2020
-- License     :  General Public Licence (GPLv3)
-- Maintainer  :  Jean-Luc JOULIN  <jean-luc-joulin@orange.fr>
-- Stability   :  alpha
-- Portability :  portable
-- The parsing functions for the hsindex program.



module HsIndex.Parser
  (
  -- * Parsing the index file


  -- * Parsing the style file


  -- * Parsing the custom language definition file
  --
  -- ** Description of a custom file
  --
  -- A custom language can be defined with a specific file.
  -- This file must respect the following format and contain in any order :
  --
  -- 1. A mandatory ordered list of letters describing the alphabet.
  --
  --        > LETTERS
  --        > abcdefghijkl
  --
  -- 2. An optional ordered list of numbers.
  --
  --        > NUMBERS
  --        > 0123456789
  --
  -- 3. An optional ordered list of symbols.
  --
  --        > SYMBOLS
  --        > 0123456789
  --
  -- 4. An optional substitution list. This list describe the character substitutions
  --    to perform before sorting the words.
  --
  --        > SUBSTITUTIONS
  --        > œ->oe
  --        > à->a
  --        > ê->e
  --
  -- ** example of custom file
  --
  --
    styleBasic
  , styleDoubleHeading
  , parseStyleFile
  , parseLanguageFile
  , parseIndexFile
  , emptyDef
  ) where


import           Data.Char
import           Data.Functor.Identity
import           Data.Functor
import           HsIndex.CharLists.French
import           HsIndex.CharLists.Russian
import           HsIndex.CharLists.SubsIEC
import           HsIndex.CharLists.Symbols
import           HsIndex.Functions
                            ( literalNewLine
                            -- ~ , upperLower
                            -- ~ , replaUpperLower
                            , sepArobase
                            )
import           HsIndex.Types
import qualified Data.Text                     as T
import           Text.Parsec
import           Text.Parsec.Perm
import           Text.Parsec.Text



-- | The default 'IndexStyle' applied to an index.
--
-- This default style have :
--
-- * Basic preamble and postamble
--
-- * Uppercase Layer 0 heading
--
-- * No Layer 1 heading
--
styleBasic = IndexStyle
  { idxPreamble     = T.pack "\\begin{theindex}\n"
  , idxPostamble    = T.pack "\n\n\\end{theindex}\n"
  , idxHeadingFlag0 = UpperCase
  , idxHeadingFlag1 = None
  , idxHeadingPreL0 = T.pack "{\\vspace{1.5cm}\\huge{\\textbf{"
  , idxHeadingSufL0 = T.pack "}}\\hfill}\\nopagebreak\n"
  , idxHeadingPreL1 = T.pack "" -- TODO Remplacer par empty
  , idxHeadingSufL1 = T.pack ""
  , idxSymhead      = T.pack "Symbols"
  , idxNumhead      = T.pack "Numbers"
  , idxGroupSkip0   = T.pack "\n \\indexspace\n" -- "\n\n \\indexspace\n"
  , idxGroupSkip1   = T.pack ""
  , idxItem0        = T.pack "\n \\item "
  , idxItem1        = T.pack "\n \\subitem "
  , idxItem2        = T.pack "\n \\subsubitem "
  , idxItem01       = T.pack "\n \\subitem "
  , idxItem12       = T.pack "\n \\subsubitem "
  , idxDelim0       = T.pack ", " -- \\hfill
  , idxDelim1       = T.pack ", "
  , idxDelim2       = T.pack ", "
  , idxDelimn       = T.pack ", "
  , idxDelimr       = T.pack "--"
  , idxEncapPre     = T.pack "{"
  , idxEncapSuf     = T.pack "}"
  }


-- | Another 'IndexStyle' applied to an index.
--
-- This default style have :
--
-- * Basic preamble and postamble
--
-- * Uppercase Layer 0 headings
--
-- * Uppercase Layer 1 headings
--
styleDoubleHeading = IndexStyle
  { idxPreamble     = T.pack "\\begin{theindex}\n"
  , idxPostamble    = T.pack "\n\n\\end{theindex}\n"
  , idxHeadingFlag0 = UpperCase
  , idxHeadingFlag1 = UpperCase
  , idxHeadingPreL0 = T.pack "{\\vspace{1.5cm}\\huge{\\textbf{"
  , idxHeadingSufL0 = T.pack "}}\\hfill}\\nopagebreak\n\n"
  , idxHeadingPreL1 = T.pack "\n{\\vspace{0.5cm}\\large{\\textbf{"
  , idxHeadingSufL1 = T.pack "}}\\hfill}\\nopagebreak"
  , idxSymhead      = T.pack "Symbols"
  , idxNumhead      = T.pack "Numbers"
  , idxGroupSkip0   = T.pack "\n\n \\indexspace\n"
  , idxGroupSkip1   = T.pack "\n\n \\indexspace\n"
  , idxItem0        = T.pack "\n \\item "
  , idxItem1        = T.pack "\n \\subitem "
  , idxItem2        = T.pack "\n \\subsubitem "
  , idxItem01       = T.pack "\n \\subitem "
  , idxItem12       = T.pack "\n \\subsubitem "
  , idxDelim0       = T.pack ", " -- \\hfill
  , idxDelim1       = T.pack ", "
  , idxDelim2       = T.pack ", "
  , idxDelimn       = T.pack ", "
  , idxDelimr       = T.pack "--"
  , idxEncapPre     = T.pack "{"
  , idxEncapSuf     = T.pack "}"
  }


-- | Parse a style file
--
-- A style file can contain several optional keywords definition to set the design
-- of an index.
-- 
--  Keywords can be :
--
-- [preamble] To set the beginning of the index.
--
--
-- [postamble] To set the end of the index.
--
parseStyleFile :: IndexStyle                            -- ^ The default 'Style' to use.
               -> ParsecT String () Identity IndexStyle -- ^ The new 'Style' parsed.
parseStyleFile sty = do
  emptyLines -- Possibles emptylines at the beginning of the file
  sty <- permute
    (    IndexStyle -- all possible permutations
    <$?> (idxPreamble sty    , try $ parseStyleDef "preamble") -- Parse the preamble
    <|?> (idxPostamble sty   , try $ parseStyleDef "postamble")
    <|?> (idxHeadingFlag0 sty, try $ parseStyleDefHead "headings_flag")
    <|?> (idxHeadingFlag1 sty, try $ parseStyleDefHead "headings_flag1")
    <|?> (idxHeadingPreL0 sty, try $ parseStyleDef "heading_prefix")
    <|?> (idxHeadingSufL0 sty, try $ parseStyleDef "heading_suffix")
    <|?> (idxHeadingPreL1 sty, try $ parseStyleDef "heading_prefix1")
    <|?> (idxHeadingSufL1 sty, try $ parseStyleDef "heading_suffix1")
    <|?> (idxSymhead sty     , try $ parseStyleDef "symhead_positive")
    <|?> (idxNumhead sty     , try $ parseStyleDef "numhead_positive")
    <|?> (idxGroupSkip0 sty  , try $ parseStyleDef "group_skip")
    <|?> (idxGroupSkip1 sty  , try $ parseStyleDef "group_skip1")
    <|?> (idxItem0 sty       , try $ parseStyleDef "item_0")
    <|?> (idxItem1 sty       , try $ parseStyleDef "item_1")
    <|?> (idxItem2 sty       , try $ parseStyleDef "item_2")
    <|?> (idxItem01 sty      , try $ parseStyleDef "item_01")
    <|?> (idxItem12 sty      , try $ parseStyleDef "item_12")
    <|?> (idxDelim0 sty      , try $ parseStyleDef "delim_0")
    <|?> (idxDelim1 sty      , try $ parseStyleDef "delim_1")
    <|?> (idxDelim2 sty      , try $ parseStyleDef "delim_2")
    <|?> (idxDelimn sty      , try $ parseStyleDef "delim_n")
    <|?> (idxDelimr sty      , try $ parseStyleDef "delim_r")
    <|?> (idxEncapPre sty    , try $ parseStyleDef "encap_infix")
    <|?> (idxEncapSuf sty    , try $ parseStyleDef "encap_suffix")

    )
  eof -- the end of file
  return sty


-- | Parse many empty lines.
emptyLines = many emptyLine


-- | Parse an empty line.
emptyLine = do
  many (oneOf " \t") -- possibly some spaces and tabulations.
  endOfLineP         -- The end of line


-- | Parse a style definition
--
-- > item_0 "my style definition"
--
parseStyleDef :: String                             -- ^ The name of the style
              -> ParsecT String () Identity  T.Text -- ^ The definition of the style.
parseStyleDef str = do
  string str
  many1 (char ' ')
  def <- between (char '"') (char '"') (many1 $ noneOf "\r\n\t\"")
  many (char ' ')
  endOfLineP
  emptyLines
  return (literalNewLine $ T.pack def)



parseStyleDefHead str = try (parseStyleDefHeadNum str) 
                    <|> try (parseStyleDefHeadNone str) 
                    <|> try (parseStyleDefHeadUpper str) 
                    <|>     (parseStyleDefHeadLower str)



parseStyleDefHeadNone :: String -> ParsecT String () Identity Heading
parseStyleDefHeadNone str = do
  string str
  many1 (char ' ')
  s <- string "None"
  many (char ' ')
  endOfLineP
  emptyLines
  return None


parseStyleDefHeadUpper :: String -> ParsecT String () Identity Heading
parseStyleDefHeadUpper str = do
  string str
  many1 (char ' ')
  s <- string "Upper"
  many (char ' ')
  endOfLineP
  emptyLines
  return UpperCase


parseStyleDefHeadLower :: String -> ParsecT String () Identity Heading
parseStyleDefHeadLower str = do
  string str
  many1 (char ' ')
  s <- string "Lower"
  many (char ' ')
  endOfLineP
  emptyLines
  return LowerCase


parseStyleDefHeadNum :: String -> ParsecT String () Identity Heading
parseStyleDefHeadNum str = do
  string str
  many1 (char ' ')
  s <- option ' ' (char '-')
  h <- many1 digit
  many (char ' ')
  endOfLineP
  emptyLines
  return (val2Heading (read (s : h)))



val2Heading 0 = None
val2Heading n = if n > 0 
  then UpperCase 
  else LowerCase


-- | Try to parse a IeC LaTeX substitution.
-- Return the associated character if succeed.
lstParseIeC lst = try $ do
  string "\\IeC"
  many (char ' ')
  choice $ map (\(s, r) ->
          (try $ do
             braces $ do
                many (char ' ')
                char '\\'
                string s
                many (char ' ')
                return r
          )) lst




-- | Parse a number
parseNumber :: ParsecT String () Identity Char
parseNumber = try digit

-- | Parse a symbol
parseSymbol :: ParsecT String () Identity Char
parseSymbol = try (oneOf allowedSymb)


-- | Parse an hyphen character
parseHyph :: ParsecT String () Identity Char
parseHyph = try (oneOf lstHyph)


parseAnything :: ParsecT String () Identity Char
parseAnything = try (noneOf forbiddenSymb)



braces = between (char '{') (char '}')

braces' = between (char '{') (try $ do char '}';lookAhead (char '{' ))


-- | Parse a single entry command from "imakeidx" LaTeX package.
parseIDX :: ParsecT String () Identity Char -> ParsecT String () Identity IndexItem
parseIDX pars = do
  string "\\indexentry"
  many (char ' ')
  ((itm,itmE),com) <- braces'
    (do
      itm <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      com <- option "" $ try $do
                   char '|'
                   optional (char '(')
                   many (notFollowedBy (do char '}';char '{') >> parseAnything)
      return (itm,com)
    )
  many (char ' ')
  n <- braces (many1 digit)
  return (IndexItem (T.pack itm) (Letters, T.pack itmE) (T.pack com) [read n] [])


-- | Parse a entry command containing a subentry from "imakeidx" LaTeX package.
parseIDXSub :: ParsecT String () Identity Char -> ParsecT String () Identity IndexItem
parseIDXSub pars = do
  string "\\indexentry"
  many (char ' ')
  ((itm,itmE), (sub,subE),com) <- braces'
    (do
      itm <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      char '!'
      sub <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      -- ~ string "|hyperpage"
      com <- option "" $ try $do
                   char '|'
                   optional (char '(')
                   many (notFollowedBy (do char '}';char '{') >> parseAnything)
      return (itm, sub,com)
    )
  many (char ' ')
  n <- braces (many1 digit)
  return (IndexItem (T.pack itm) (Letters, T.pack itmE) T.empty [] [IndexSubItem (T.pack sub) (Letters, T.pack subE) (T.pack com) [read n] []])


-- | Parse a entry command containing a subsubentry from "imakeidx" LaTeX package.
parseIDXSubSub :: ParsecT String () Identity Char -> ParsecT String () Identity IndexItem
parseIDXSubSub pars = do
  string "\\indexentry"
  many (char ' ')
  ((itm,itmE), (sub,subE), (ssub,ssubE),com) <- braces'
    (do
      itm <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      char '!'
      sub <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      char '!'
      ssub <- sepArobase <$> many1 (notFollowedBy (do char '}';char '{') >> pars)
      -- ~ string "|hyperpage"
      com <- option "" $ try $ do
                   char '|'
                   optional (char '(')
                   many (notFollowedBy (do char '}';char '{') >> parseAnything)
      return (itm, sub, ssub,com)
    )
  many (char ' ')
  n <- braces (many1 digit)
  return (IndexItem (T.pack itm) (Letters, T.pack itmE) T.empty [] [IndexSubItem (T.pack sub) (Letters, T.pack subE) T.empty [] [IndexSubSubItem (T.pack ssub) (Letters, T.pack ssubE) (T.pack com)  [read n]]])


-- | Parse all possible forms of entry from "imakeidx" LaTeX package.
parseIndexItem pars = try (parseIDXSubSub pars) 
                  <|> try (parseIDXSub pars) 
                  <|>      parseIDX pars


parseIndexFile :: ParsecT String () Identity [IndexItem]
parseIndexFile = do
  emptyLines
  itms <- endBy (parseIndexItem parseCharL) endOfLineP
  emptyLines
  eof
  return itms




-- | Parse a end of line in both UNIX and WINDOWS format.
-- ~ endOfLineP :: ParsecT String () Identity String
endOfLineP = try (string "\n")    -- Fin de ligne Unix/Linux (LF : Line feed)
         <|> try (string "\r\n")  -- Fin de ligne Windows (CRLF : Carriage return Line feed)



-- | Standard parser for chars.
--
-- Try to parse :
--
-- 1. The specific char output from the "imakeidx" LaTeX package.
--
-- 2. Numbers
--
-- 3. 
parseCharL :: ParsecT String () Identity Char
parseCharL =  lstParseIeC lstLaTeXSubs
          <|>  parseAnything




-- | Parse a file containing the lists of chars defining a language.
--
parseLanguageFile ::  ParsecT String PermState Identity LangDef
parseLanguageFile = do
  emptyLines -- Possibles emptylines at the beginning of the file
  -- ~ putState emptyPermState
  def <- permute
    (    (\a b c d -> LangDef a b c d []) -- all possible permutations
    <$$> try (parseCharDefLetters ) -- <$$>
    <|?> ([], try $ parseCharDefNumbers )
    <|?> (Nothing, try $ parseCharDefSymbols )
    <|?> ([], try $ parseSubstitutions )
    )
  eof -- the end of file
  stat <- getState
  return def{lstSecOrder=order stat}


-- | Parse a char list definition.
--
-- A char list is defined by:
--
-- * A title
--
-- * A list of chars describing the sorting order of letters of this language.
--
parseCharDefLetters  = do
  string "LETTERS"
  endOfLineP
  chrs <- many (noneOf "\n\t")
  endOfLineP
  emptyLines
  modifyState (\st -> st{order=order st++[Letters]})
  return ( chrs)


parseCharDefNumbers  = do
  string "NUMBERS"
  endOfLineP
  chrs <- many (noneOf "\n\t")
  endOfLineP
  emptyLines
  modifyState (\st -> st{order=order st++[Numbers]})
  return ( chrs)


-- | Parse a char list definition.
--
-- A char list is defined by:
--
-- * A title
--
-- * A list of chars describing the sorting order of letters of this language.
--
parseCharDefSymbols  = do
  string "SYMBOLS"
  endOfLineP
  chrs <- many (noneOf "\n\t")
  endOfLineP
  emptyLines
  modifyState (\st -> st{order=order st++[Symbols]})
  return (Just ( chrs))


-- | Parse a list of substitutions
--
--  A substitution give an equivalent string to a char. A list of substitution is defined by :
--
-- * A title
--
-- * A list of substitutions :
--
--     > œ->oe
--     > à->a
--     > ê->e
--
-- note: The arrow -> musn't be preceded or followed by spaces.
--
-- A special char can be substituted by a space with the following substitution.
--
--     > _-> 
--     > --> 
--
parseSubstitutions  = do
  string "SUBSTITUTIONS"
  endOfLineP
  repl <- many1 parseSubstitution
  emptyLines
  return repl -- (replaUpperLower repl)

parseSubstitution = do
  cha <- noneOf "\r\n\t"
  string "->"
  str <- many1 (noneOf "\r\n\t")
  -- many (char ' ')
  endOfLineP
  return (cha, str)


-- | The empty list of chars.
emptyDef = LangDef [] [] Nothing [] []