-- Copyright 2013 Evan Laforge
-- This program is distributed under the terms of the GNU General Public
-- License 3.0, see COPYING or http://www.gnu.org/licenses/gpl-3.0.txt

-- | More user friendly regex api for PCRE regexes.
module Util.Regex (
    Regex
    -- * compile
    , Option(..)
    , compile, compileOptions, compileUnsafe, compileOptionsUnsafe

    -- * matching
    , matches, groups, groupRanges
    -- * substitute
    , substitute, substituteGroups

    -- * misc
    , escape
) where
import qualified Data.ByteString as ByteString
import qualified Data.Text as Text
import           Data.Text (Text)
import qualified Data.Text.Encoding as Encoding

import qualified GHC.Stack as Stack
import qualified Text.Regex.PCRE.Heavy as PCRE
import           Text.Regex.PCRE.Heavy (Regex)
import qualified Text.Regex.PCRE.Light as PCRE


-- * compile

fromText :: Text -> ByteString.ByteString
fromText :: Text -> SBS
fromText = Text -> SBS
Encoding.encodeUtf8

data Option = CaseInsensitive | DotAll | Multiline
    deriving (Eq Option
Option -> Option -> Bool
Option -> Option -> Ordering
Option -> Option -> Option
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Option -> Option -> Option
$cmin :: Option -> Option -> Option
max :: Option -> Option -> Option
$cmax :: Option -> Option -> Option
>= :: Option -> Option -> Bool
$c>= :: Option -> Option -> Bool
> :: Option -> Option -> Bool
$c> :: Option -> Option -> Bool
<= :: Option -> Option -> Bool
$c<= :: Option -> Option -> Bool
< :: Option -> Option -> Bool
$c< :: Option -> Option -> Bool
compare :: Option -> Option -> Ordering
$ccompare :: Option -> Option -> Ordering
Ord, Option -> Option -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Option -> Option -> Bool
$c/= :: Option -> Option -> Bool
== :: Option -> Option -> Bool
$c== :: Option -> Option -> Bool
Eq, Int -> Option -> ShowS
[Option] -> ShowS
Option -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Option] -> ShowS
$cshowList :: [Option] -> ShowS
show :: Option -> String
$cshow :: Option -> String
showsPrec :: Int -> Option -> ShowS
$cshowsPrec :: Int -> Option -> ShowS
Show)

compile :: String -> Either String Regex
compile :: String -> Either String Regex
compile = [Option] -> String -> Either String Regex
compileOptions []

compileOptions :: [Option] -> String -> Either String Regex
compileOptions :: [Option] -> String -> Either String Regex
compileOptions [Option]
options String
text =
    case SBS -> [PCREOption] -> Either String Regex
PCRE.compileM (Text -> SBS
fromText (String -> Text
Text.pack String
text)) ([Option] -> [PCREOption]
convertOptions [Option]
options) of
        Left String
msg -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"compiling regex " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
text forall a. [a] -> [a] -> [a]
++ String
": " forall a. [a] -> [a] -> [a]
++ String
msg
        Right Regex
regex -> forall a b. b -> Either a b
Right Regex
regex

convertOptions :: [Option] -> [PCRE.PCREOption]
convertOptions :: [Option] -> [PCREOption]
convertOptions = ([PCREOption]
options++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Option -> PCREOption
convert
    where
    convert :: Option -> PCREOption
convert Option
opt = case Option
opt of
        Option
CaseInsensitive -> PCREOption
PCRE.caseless
        Option
DotAll -> PCREOption
PCRE.dotall
        Option
Multiline -> PCREOption
PCRE.multiline
    options :: [PCREOption]
options = [PCREOption
PCRE.utf8, PCREOption
PCRE.no_utf8_check]

-- | Will throw a runtime error if the regex has an error!
compileUnsafe :: Stack.HasCallStack => String -> Regex
compileUnsafe :: HasCallStack => String -> Regex
compileUnsafe = HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe []

-- | Will throw a runtime error if the regex has an error!
compileOptionsUnsafe :: Stack.HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe :: HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe [Option]
options = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Option] -> String -> Either String Regex
compileOptions [Option]
options

-- * match

matches :: Regex -> Text -> Bool
matches :: Regex -> Text -> Bool
matches = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. ConvertibleStrings a SBS => a -> Regex -> Bool
(PCRE.=~)

-- | Return (complete_match, [group_match]).
groups :: Regex -> Text -> [(Text, [Text])]
groups :: Regex -> Text -> [(Text, [Text])]
groups = forall a.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS) =>
Regex -> a -> [(a, [a])]
PCRE.scan

-- | Half-open ranges of where the regex matches.
groupRanges :: Regex -> Text -> [((Int, Int), [(Int, Int)])]
    -- ^ (entire, [group])
groupRanges :: Regex -> Text -> [((Int, Int), [(Int, Int)])]
groupRanges = forall a.
ConvertibleStrings a SBS =>
Regex -> a -> [((Int, Int), [(Int, Int)])]
PCRE.scanRanges

-- * substitute

-- | TODO this is not the usual thing where it replaces \1 \2 etc., but
-- it replaces the entire match.
substitute :: Regex -> Text -> Text -> Text
substitute :: Regex -> Text -> Text -> Text
substitute Regex
regex Text
sub = forall a r.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS,
 RegexReplacement r) =>
Regex -> r -> a -> a
PCRE.gsub Regex
regex Text
sub

substituteGroups :: Regex -> (Text -> [Text] -> Text)
    -- ^ (complete_match -> groups -> replacement)
    -> Text -> Text
substituteGroups :: Regex -> (Text -> [Text] -> Text) -> Text -> Text
substituteGroups = forall a r.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS,
 RegexReplacement r) =>
Regex -> r -> a -> a
PCRE.gsub

-- * misc

-- | Escape a string so the regex matches it literally.
escape :: String -> String
escape :: ShowS
escape String
"" = String
""
escape (Char
c : String
cs)
    | Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"\\^$.[|()?*+{" :: [Char]) = Char
'\\' forall a. a -> [a] -> [a]
: Char
c forall a. a -> [a] -> [a]
: ShowS
escape String
cs
    | Bool
otherwise = Char
c forall a. a -> [a] -> [a]
: ShowS
escape String
cs