-- Copyright 2013 Evan Laforge
-- This program is distributed under the terms of the GNU General Public
-- License 3.0, see COPYING or http://www.gnu.org/licenses/gpl-3.0.txt

-- | More user friendly regex api for PCRE regexes.
module Util.Regex (
    Regex
    -- * compile
    , Option(..)
    , compile, compileOptions, compileUnsafe, compileOptionsUnsafe

    -- * matching
    , matches, groups, groupRanges
    -- * substitute
    , substitute, substituteGroups

    -- * misc
    , escape
) where
import qualified Data.ByteString as ByteString
import qualified Data.Text as Text
import           Data.Text (Text)
import qualified Data.Text.Encoding as Encoding

import qualified GHC.Stack as Stack
import qualified Text.Regex.PCRE.Heavy as PCRE
import           Text.Regex.PCRE.Heavy (Regex)
import qualified Text.Regex.PCRE.Light as PCRE


-- * compile

fromText :: Text -> ByteString.ByteString
fromText :: Text -> SBS
fromText = Text -> SBS
Encoding.encodeUtf8

data Option = CaseInsensitive | DotAll | Multiline
    deriving (Eq Option
Eq Option
-> (Option -> Option -> Ordering)
-> (Option -> Option -> Bool)
-> (Option -> Option -> Bool)
-> (Option -> Option -> Bool)
-> (Option -> Option -> Bool)
-> (Option -> Option -> Option)
-> (Option -> Option -> Option)
-> Ord Option
Option -> Option -> Bool
Option -> Option -> Ordering
Option -> Option -> Option
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Option -> Option -> Option
$cmin :: Option -> Option -> Option
max :: Option -> Option -> Option
$cmax :: Option -> Option -> Option
>= :: Option -> Option -> Bool
$c>= :: Option -> Option -> Bool
> :: Option -> Option -> Bool
$c> :: Option -> Option -> Bool
<= :: Option -> Option -> Bool
$c<= :: Option -> Option -> Bool
< :: Option -> Option -> Bool
$c< :: Option -> Option -> Bool
compare :: Option -> Option -> Ordering
$ccompare :: Option -> Option -> Ordering
Ord, Option -> Option -> Bool
(Option -> Option -> Bool)
-> (Option -> Option -> Bool) -> Eq Option
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Option -> Option -> Bool
$c/= :: Option -> Option -> Bool
== :: Option -> Option -> Bool
$c== :: Option -> Option -> Bool
Eq, Int -> Option -> ShowS
[Option] -> ShowS
Option -> String
(Int -> Option -> ShowS)
-> (Option -> String) -> ([Option] -> ShowS) -> Show Option
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Option] -> ShowS
$cshowList :: [Option] -> ShowS
show :: Option -> String
$cshow :: Option -> String
showsPrec :: Int -> Option -> ShowS
$cshowsPrec :: Int -> Option -> ShowS
Show)

compile :: String -> Either String Regex
compile :: String -> Either String Regex
compile = [Option] -> String -> Either String Regex
compileOptions []

compileOptions :: [Option] -> String -> Either String Regex
compileOptions :: [Option] -> String -> Either String Regex
compileOptions [Option]
options String
text =
    case SBS -> [PCREOption] -> Either String Regex
PCRE.compileM (Text -> SBS
fromText (String -> Text
Text.pack String
text)) ([Option] -> [PCREOption]
convertOptions [Option]
options) of
        Left String
msg -> String -> Either String Regex
forall a b. a -> Either a b
Left (String -> Either String Regex) -> String -> Either String Regex
forall a b. (a -> b) -> a -> b
$ String
"compiling regex " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
text String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg
        Right Regex
regex -> Regex -> Either String Regex
forall a b. b -> Either a b
Right Regex
regex

convertOptions :: [Option] -> [PCRE.PCREOption]
convertOptions :: [Option] -> [PCREOption]
convertOptions = ([PCREOption]
options++) ([PCREOption] -> [PCREOption])
-> ([Option] -> [PCREOption]) -> [Option] -> [PCREOption]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Option -> PCREOption) -> [Option] -> [PCREOption]
forall a b. (a -> b) -> [a] -> [b]
map Option -> PCREOption
convert
    where
    convert :: Option -> PCREOption
convert Option
opt = case Option
opt of
        Option
CaseInsensitive -> PCREOption
PCRE.caseless
        Option
DotAll -> PCREOption
PCRE.dotall
        Option
Multiline -> PCREOption
PCRE.multiline
    options :: [PCREOption]
options = [PCREOption
PCRE.utf8, PCREOption
PCRE.no_utf8_check]

-- | Will throw a runtime error if the regex has an error!
compileUnsafe :: Stack.HasCallStack => String -> Regex
compileUnsafe :: HasCallStack => String -> Regex
compileUnsafe = HasCallStack => [Option] -> String -> Regex
[Option] -> String -> Regex
compileOptionsUnsafe []

-- | Will throw a runtime error if the regex has an error!
compileOptionsUnsafe :: Stack.HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe :: HasCallStack => [Option] -> String -> Regex
compileOptionsUnsafe [Option]
options = (String -> Regex)
-> (Regex -> Regex) -> Either String Regex -> Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Regex
forall a. HasCallStack => String -> a
error Regex -> Regex
forall a. a -> a
id (Either String Regex -> Regex)
-> (String -> Either String Regex) -> String -> Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Option] -> String -> Either String Regex
compileOptions [Option]
options

-- * match

matches :: Regex -> Text -> Bool
matches :: Regex -> Text -> Bool
matches = (Text -> Regex -> Bool) -> Regex -> Text -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> Regex -> Bool
forall a.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS) =>
a -> Regex -> Bool
(PCRE.=~)

-- | Return (complete_match, [group_match]).
groups :: Regex -> Text -> [(Text, [Text])]
groups :: Regex -> Text -> [(Text, [Text])]
groups = Regex -> Text -> [(Text, [Text])]
forall a.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS) =>
Regex -> a -> [(a, [a])]
PCRE.scan

-- | Half-open ranges of where the regex matches.
groupRanges :: Regex -> Text -> [((Int, Int), [(Int, Int)])]
    -- ^ (entire, [group])
groupRanges :: Regex -> Text -> [((Int, Int), [(Int, Int)])]
groupRanges = Regex -> Text -> [((Int, Int), [(Int, Int)])]
forall a.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS) =>
Regex -> a -> [((Int, Int), [(Int, Int)])]
PCRE.scanRanges

-- * substitute

-- | TODO this is not the usual thing where it replaces \1 \2 etc., but
-- it replaces the entire match.
substitute :: Regex -> Text -> Text -> Text
substitute :: Regex -> Text -> Text -> Text
substitute Regex
regex Text
sub = Regex -> Text -> Text -> Text
forall a r.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS,
 RegexReplacement r) =>
Regex -> r -> a -> a
PCRE.gsub Regex
regex Text
sub

substituteGroups :: Regex -> (Text -> [Text] -> Text)
    -- ^ (complete_match -> groups -> replacement)
    -> Text -> Text
substituteGroups :: Regex -> (Text -> [Text] -> Text) -> Text -> Text
substituteGroups = Regex -> (Text -> [Text] -> Text) -> Text -> Text
forall a r.
(ConvertibleStrings SBS a, ConvertibleStrings a SBS,
 RegexReplacement r) =>
Regex -> r -> a -> a
PCRE.gsub

-- * misc

-- | Escape a string so the regex matches it literally.
escape :: String -> String
escape :: ShowS
escape String
"" = String
""
escape (Char
c : String
cs)
    | Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"\\^$.[|()?*+{" :: [Char]) = Char
'\\' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
escape String
cs
    | Bool
otherwise = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
escape String
cs