{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Text.Read.Lex
( Lexeme(..), Number
, numberToInteger, numberToFixed, numberToRational, numberToRangedRational
, lex, expect
, hsLex
, lexChar
, readIntP
, readOctP
, readDecP
, readHexP
, isSymbolChar
)
where
import Text.ParserCombinators.ReadP
import GHC.Base
import GHC.Char
import GHC.Num( Num(..), Integer )
import GHC.Show( Show(..) )
import GHC.Unicode
( GeneralCategory(..), generalCategory, isSpace, isAlpha, isAlphaNum )
import GHC.Real( Rational, (%), fromIntegral, Integral,
toInteger, (^), quot, even )
import GHC.List
import GHC.Enum( minBound, maxBound )
import Data.Maybe
guard :: (MonadPlus m) => Bool -> m ()
guard True = return ()
guard False = mzero
data Lexeme
= Char Char
| String String
| Punc String
| Ident String
| Symbol String
| Number Number
| EOF
deriving (Eq, Show)
data Number = MkNumber Int
Digits
| MkDecimal Digits
(Maybe Digits)
(Maybe Integer)
deriving (Eq, Show)
numberToInteger :: Number -> Maybe Integer
numberToInteger (MkNumber base iPart) = Just (val (fromIntegral base) iPart)
numberToInteger (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart)
numberToInteger _ = Nothing
numberToFixed :: Integer -> Number -> Maybe (Integer, Integer)
numberToFixed _ (MkNumber base iPart) = Just (val (fromIntegral base) iPart, 0)
numberToFixed _ (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart, 0)
numberToFixed p (MkDecimal iPart (Just fPart) Nothing)
= let i = val 10 iPart
f = val 10 (integerTake p (fPart ++ repeat 0))
integerTake :: Integer -> [a] -> [a]
integerTake n _ | n <= 0 = []
integerTake _ [] = []
integerTake n (x:xs) = x : integerTake (n-1) xs
in Just (i, f)
numberToFixed _ _ = Nothing
numberToRangedRational :: (Int, Int) -> Number
-> Maybe Rational
numberToRangedRational (neg, pos) n@(MkDecimal iPart mFPart (Just exp))
| exp > fromIntegral (maxBound :: Int) ||
exp < fromIntegral (minBound :: Int)
= Nothing
| otherwise
= let mFirstDigit = case dropWhile (0 ==) iPart of
iPart'@(_ : _) -> Just (length iPart')
[] -> case mFPart of
Nothing -> Nothing
Just fPart ->
case span (0 ==) fPart of
(_, []) -> Nothing
(zeroes, _) ->
Just (negate (length zeroes))
in case mFirstDigit of
Nothing -> Just 0
Just firstDigit ->
let firstDigit' = firstDigit + fromInteger exp
in if firstDigit' > (pos + 3)
then Nothing
else if firstDigit' < (neg - 3)
then Just 0
else Just (numberToRational n)
numberToRangedRational _ n = Just (numberToRational n)
numberToRational :: Number -> Rational
numberToRational (MkNumber base iPart) = val (fromIntegral base) iPart % 1
numberToRational (MkDecimal iPart mFPart mExp)
= let i = val 10 iPart
in case (mFPart, mExp) of
(Nothing, Nothing) -> i % 1
(Nothing, Just exp)
| exp >= 0 -> (i * (10 ^ exp)) % 1
| otherwise -> i % (10 ^ (- exp))
(Just fPart, Nothing) -> fracExp 0 i fPart
(Just fPart, Just exp) -> fracExp exp i fPart
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
expect :: Lexeme -> ReadP ()
expect lexeme = do { skipSpaces
; thing <- lexToken
; if thing == lexeme then return () else pfail }
hsLex :: ReadP String
hsLex = do skipSpaces
(s,_) <- gather lexToken
return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
lexLitChar +++
lexString +++
lexPunc +++
lexSymbol +++
lexId +++
lexNumber
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
guard (null s)
return EOF
lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
isPuncChar :: Char -> Bool
isPuncChar c = c `elem` ",;()[]{}`"
lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
return (Punc s)
else
return (Symbol s)
where
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
isSymbolChar :: Char -> Bool
isSymbolChar c = not (isPuncChar c) && case generalCategory c of
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
DashPunctuation -> True
OtherPunctuation -> not (c `elem` "'\"")
ConnectorPunctuation -> c /= '_'
_ -> False
lexId :: ReadP Lexeme
lexId = do c <- satisfy isIdsChar
s <- munch isIdfChar
return (Ident (c:s))
where
isIdsChar c = isAlpha c || c == '_'
isIdfChar c = isAlphaNum c || c `elem` "_'"
lexLitChar :: ReadP Lexeme
lexLitChar =
do _ <- char '\''
(c,esc) <- lexCharE
guard (esc || c /= '\'')
_ <- char '\''
return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; consumeEmpties; return c }
where
consumeEmpties :: ReadP ()
consumeEmpties = do
rest <- look
case rest of
('\\':'&':_) -> string "\\&" >> consumeEmpties
_ -> return ()
lexCharE :: ReadP (Char, Bool)
lexCharE =
do c1 <- get
if c1 == '\\'
then do c2 <- lexEsc; return (c2, True)
else do return (c1, False)
where
lexEsc =
lexEscChar
+++ lexNumeric
+++ lexCntrlChar
+++ lexAscii
lexEscChar =
do c <- get
case c of
'a' -> return '\a'
'b' -> return '\b'
'f' -> return '\f'
'n' -> return '\n'
'r' -> return '\r'
't' -> return '\t'
'v' -> return '\v'
'\\' -> return '\\'
'\"' -> return '\"'
'\'' -> return '\''
_ -> pfail
lexNumeric =
do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
lexCntrlChar =
do _ <- char '^'
c <- get
case c of
'@' -> return '\^@'
'A' -> return '\^A'
'B' -> return '\^B'
'C' -> return '\^C'
'D' -> return '\^D'
'E' -> return '\^E'
'F' -> return '\^F'
'G' -> return '\^G'
'H' -> return '\^H'
'I' -> return '\^I'
'J' -> return '\^J'
'K' -> return '\^K'
'L' -> return '\^L'
'M' -> return '\^M'
'N' -> return '\^N'
'O' -> return '\^O'
'P' -> return '\^P'
'Q' -> return '\^Q'
'R' -> return '\^R'
'S' -> return '\^S'
'T' -> return '\^T'
'U' -> return '\^U'
'V' -> return '\^V'
'W' -> return '\^W'
'X' -> return '\^X'
'Y' -> return '\^Y'
'Z' -> return '\^Z'
'[' -> return '\^['
'\\' -> return '\^\'
']' -> return '\^]'
'^' -> return '\^^'
'_' -> return '\^_'
_ -> pfail
lexAscii =
do choice
[ (string "SOH" >> return '\SOH') <++
(string "SO" >> return '\SO')
, string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "ENQ" >> return '\ENQ'
, string "ACK" >> return '\ACK'
, string "BEL" >> return '\BEL'
, string "BS" >> return '\BS'
, string "HT" >> return '\HT'
, string "LF" >> return '\LF'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
, string "DC2" >> return '\DC2'
, string "DC3" >> return '\DC3'
, string "DC4" >> return '\DC4'
, string "NAK" >> return '\NAK'
, string "SYN" >> return '\SYN'
, string "ETB" >> return '\ETB'
, string "CAN" >> return '\CAN'
, string "EM" >> return '\EM'
, string "SUB" >> return '\SUB'
, string "ESC" >> return '\ESC'
, string "FS" >> return '\FS'
, string "GS" >> return '\GS'
, string "RS" >> return '\RS'
, string "US" >> return '\US'
, string "SP" >> return '\SP'
, string "DEL" >> return '\DEL'
]
lexString :: ReadP Lexeme
lexString =
do _ <- char '"'
body id
where
body f =
do (c,esc) <- lexStrItem
if c /= '"' || esc
then body (f.(c:))
else let s = f "" in
return (String s)
lexStrItem = (lexEmpty >> lexStrItem)
+++ lexCharE
lexEmpty =
do _ <- char '\\'
c <- get
case c of
'&' -> do return ()
_ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
_ -> do pfail
type Base = Int
type Digits = [Int]
lexNumber :: ReadP Lexeme
lexNumber
= lexHexOct <++
lexDecNumber
lexHexOct :: ReadP Lexeme
lexHexOct
= do _ <- char '0'
base <- lexBaseChar
digits <- lexDigits base
return (Number (MkNumber base digits))
lexBaseChar :: ReadP Int
lexBaseChar = do { c <- get;
case c of
'o' -> return 8
'O' -> return 8
'x' -> return 16
'X' -> return 16
_ -> pfail }
lexDecNumber :: ReadP Lexeme
lexDecNumber =
do xs <- lexDigits 10
mFrac <- lexFrac <++ return Nothing
mExp <- lexExp <++ return Nothing
return (Number (MkDecimal xs mFrac mExp))
lexFrac :: ReadP (Maybe Digits)
lexFrac = do _ <- char '.'
fraction <- lexDigits 10
return (Just fraction)
lexExp :: ReadP (Maybe Integer)
lexExp = do _ <- char 'e' +++ char 'E'
exp <- signedExp +++ lexInteger 10
return (Just exp)
where
signedExp
= do c <- char '-' +++ char '+'
n <- lexInteger 10
return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
lexDigits base =
do s <- look
xs <- scan s id
guard (not (null xs))
return xs
where
scan (c:cs) f = case valDig base c of
Just n -> do _ <- get; scan cs (f.(n:))
Nothing -> do return (f [])
scan [] f = do return (f [])
lexInteger :: Base -> ReadP Integer
lexInteger base =
do xs <- lexDigits base
return (val (fromIntegral base) xs)
val :: Num a => a -> Digits -> a
val = valSimple
{-# RULES
"val/Integer" val = valInteger
#-}
{-# INLINE [1] val #-}
valSimple :: (Num a, Integral d) => a -> [d] -> a
valSimple base = go 0
where
go r [] = r
go r (d : ds) = r' `seq` go r' ds
where
r' = r * base + fromIntegral d
{-# INLINE valSimple #-}
valInteger :: Integer -> Digits -> Integer
valInteger b0 ds0 = go b0 (length ds0) $ map fromIntegral ds0
where
go _ _ [] = 0
go _ _ [d] = d
go b l ds
| l > 40 = b' `seq` go b' l' (combine b ds')
| otherwise = valSimple b ds
where
ds' = if even l then ds else 0 : ds
b' = b * b
l' = (l + 1) `quot` 2
combine b (d1 : d2 : ds) = d `seq` (d : combine b ds)
where
d = d1 * b + d2
combine _ [] = []
combine _ [_] = errorWithoutStackTrace "this should not happen"
fracExp :: Integer -> Integer -> Digits -> Rational
fracExp exp mant []
| exp < 0 = mant % (10 ^ (-exp))
| otherwise = fromInteger (mant * 10 ^ exp)
fracExp exp mant (d:ds) = exp' `seq` mant' `seq` fracExp exp' mant' ds
where
exp' = exp - 1
mant' = mant * 10 + fromIntegral d
valDig :: (Eq a, Num a) => a -> Char -> Maybe Int
valDig 8 c
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
valDig _ _ = errorWithoutStackTrace "valDig: Bad base"
valDecDig :: Char -> Maybe Int
valDecDig c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| otherwise = Nothing
readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
readIntP base isDigit valDigit =
do s <- munch1 isDigit
return (val base (map valDigit s))
{-# SPECIALISE readIntP
:: Integer -> (Char -> Bool) -> (Char -> Int) -> ReadP Integer #-}
readIntP' :: (Eq a, Num a) => a -> ReadP a
readIntP' base = readIntP base isDigit valDigit
where
isDigit c = maybe False (const True) (valDig base c)
valDigit c = maybe 0 id (valDig base c)
{-# SPECIALISE readIntP' :: Integer -> ReadP Integer #-}
readOctP, readDecP, readHexP :: (Eq a, Num a) => ReadP a
readOctP = readIntP' 8
readDecP = readIntP' 10
readHexP = readIntP' 16
{-# SPECIALISE readOctP :: ReadP Integer #-}
{-# SPECIALISE readDecP :: ReadP Integer #-}
{-# SPECIALISE readHexP :: ReadP Integer #-}