Commit a18270a3 by Zachary Snow

many lexer fixes

- added support for macros with default arguments
- fixed bug where nested preprocessor conditionals would not be skipped
- macro expansion respects escaped quotation marks
- macro expansion considers whole identifiers, rather than substituting in wildly
parent f59ed11e
...@@ -24,7 +24,8 @@ import System.FilePath (dropFileName) ...@@ -24,7 +24,8 @@ import System.FilePath (dropFileName)
import System.Directory (findFile) import System.Directory (findFile)
import System.IO.Unsafe (unsafePerformIO) import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.List (findIndex, isPrefixOf) import Data.List (span, elemIndex, isPrefixOf, dropWhileEnd)
import Data.Maybe (isJust, fromJust)
import Language.SystemVerilog.Parser.Tokens import Language.SystemVerilog.Parser.Tokens
} }
...@@ -317,7 +318,7 @@ data Cond ...@@ -317,7 +318,7 @@ data Cond
data AlexUserState = LS data AlexUserState = LS
{ lsToks :: [Token] -- tokens read so far, *in reverse order* for efficiency { lsToks :: [Token] -- tokens read so far, *in reverse order* for efficiency
, lsCurrFile :: FilePath -- currently active filename , lsCurrFile :: FilePath -- currently active filename
, lsEnv :: Map.Map String (String, [String]) -- active macro definitions , lsEnv :: Map.Map String (String, [(String, Maybe String)]) -- active macro definitions
, lsCondStack :: [Cond] -- if-else cascade state , lsCondStack :: [Cond] -- if-else cascade state
, lsIncludePaths :: [FilePath] -- folders to search for includes , lsIncludePaths :: [FilePath] -- folders to search for includes
} deriving (Eq, Show) } deriving (Eq, Show)
...@@ -476,6 +477,21 @@ dropSpaces = do ...@@ -476,6 +477,21 @@ dropSpaces = do
' ' : rest -> alexSetInput (alexMove pos ' ', ' ', [], rest) ' ' : rest -> alexSetInput (alexMove pos ' ', ' ', [], rest)
ch : _ -> lexicalError $ "expected ' ', but found: " ++ show ch ch : _ -> lexicalError $ "expected ' ', but found: " ++ show ch
isWhitespaceChar :: Char -> Bool
isWhitespaceChar ch = elem ch [' ', '\t', '\n']
-- drop leading whitespace in the input
dropWhitespace :: Alex ()
dropWhitespace = do
(_, _, _, str) <- alexGetInput
if null str || not (isWhitespaceChar $ head str)
then return ()
else dropChar >> dropWhitespace
where
dropChar :: Alex ()
dropChar = do
(pos, _, _, ch : rest) <- alexGetInput
alexSetInput (alexMove pos ch, ch, [], rest)
-- removes and returns a quoted string such as <foo.bar> or "foo.bar" -- removes and returns a quoted string such as <foo.bar> or "foo.bar"
takeQuotedString :: Alex String takeQuotedString :: Alex String
...@@ -500,20 +516,7 @@ peekChar = do ...@@ -500,20 +516,7 @@ peekChar = do
then '\n' then '\n'
else head str else head str
takeMacroArgNames :: Alex [String] takeMacroDefinition :: Alex (String, [(String, Maybe String)])
takeMacroArgNames = do
dropSpaces
name <- takeString
dropSpaces
ch <- takeChar
rest <- case ch of
',' -> takeMacroArgNames
')' -> return []
_ -> lexicalError $ "unexpected char in macro defn. args: " ++ show ch
return $ name : rest
-- TODO FIXME: We don't currently support macro arguments with default values!
takeMacroDefinition :: Alex (String, [String])
takeMacroDefinition = do takeMacroDefinition = do
leadCh <- peekChar leadCh <- peekChar
if leadCh /= '(' if leadCh /= '('
...@@ -521,12 +524,27 @@ takeMacroDefinition = do ...@@ -521,12 +524,27 @@ takeMacroDefinition = do
body <- takeUntilNewline body <- takeUntilNewline
return (body, []) return (body, [])
else do else do
'(' <- takeChar args <- takeMacroArguments
args <- takeMacroArgNames
body <- takeUntilNewline body <- takeUntilNewline
argsWithDefaults <- mapM splitArg args
if null args if null args
then lexicalError "macros cannot have 0 args" then lexicalError "macros cannot have 0 args"
else return (body, args) else return (body, argsWithDefaults)
where
splitArg :: String -> Alex (String, Maybe String)
splitArg [] = lexicalError "macro defn. empty argument"
splitArg str = do
let (name, rest) = span isIdentChar str
if null name || not (all isIdentChar name) then
lexicalError $ "invalid macro arg name: " ++ show name
else if null rest then
return (name, Nothing)
else do
let trimmed = dropWhile isWhitespaceChar rest
let leadCh = head trimmed
if leadCh /= '='
then lexicalError $ "bad char after arg name: " ++ (show leadCh)
else return (name, Just $ tail trimmed)
-- commas and right parens are forbidden outside matched pairs of: (), [], {}, -- commas and right parens are forbidden outside matched pairs of: (), [], {},
-- "", except to delimit arguments or end the list of arguments; see 22.5.1 -- "", except to delimit arguments or end the list of arguments; see 22.5.1
...@@ -534,19 +552,27 @@ takeMacroArguments :: Alex [String] ...@@ -534,19 +552,27 @@ takeMacroArguments :: Alex [String]
takeMacroArguments = do takeMacroArguments = do
dropSpaces dropSpaces
'(' <- takeChar '(' <- takeChar
loop "" [] argLoop
where where
loop :: String -> [Char] -> Alex [String] argLoop :: Alex [String]
argLoop = do
dropWhitespace
(arg, isEnd) <- loop "" []
let arg' = dropWhileEnd isWhitespaceChar arg
if isEnd
then return [arg']
else do
rest <- argLoop
return $ arg' : rest
loop :: String -> [Char] -> Alex (String, Bool)
loop curr stack = do loop curr stack = do
ch <- takeChar ch <- takeChar
case (stack, ch) of case (stack, ch) of
( s,'\\') -> do ( s,'\\') -> do
ch2 <- takeChar ch2 <- takeChar
loop (curr ++ [ch, ch2]) s loop (curr ++ [ch, ch2]) s
([ ], ',') -> do ([ ], ',') -> return (curr, False)
rest <- loop "" stack ([ ], ')') -> return (curr, True)
return $ curr : rest
([ ], ')') -> return [curr]
('"' : s, '"') -> loop (curr ++ [ch]) s ('"' : s, '"') -> loop (curr ++ [ch]) s
( s, '"') -> loop (curr ++ [ch]) ('"' : s) ( s, '"') -> loop (curr ++ [ch]) ('"' : s)
...@@ -578,17 +604,33 @@ substituteArgs ('`' : '`' : body) names args = ...@@ -578,17 +604,33 @@ substituteArgs ('`' : '`' : body) names args =
substituteArgs ('"' : body) names args = substituteArgs ('"' : body) names args =
'"' : start ++ substituteArgs rest names args '"' : start ++ substituteArgs rest names args
where (start, rest) = findUnescapedQuote body where (start, rest) = findUnescapedQuote body
substituteArgs ('\\' : '"' : body) names args =
'\\' : '"' : substituteArgs body names args
substituteArgs ('`' : '"' : body) names args = substituteArgs ('`' : '"' : body) names args =
'"' : substituteArgs (init start) names args '"' : substituteArgs (init start) names args
++ '"' : substituteArgs rest names args ++ '"' : substituteArgs rest names args
where (start, rest) = findUnescapedQuote body where (start, rest) = findUnescapedQuote body
substituteArgs body names args = substituteArgs body names args =
case findIndex isPresent names of case span isIdentChar body of
Nothing -> head body : substituteArgs (tail body) names args ([], _) -> head body : substituteArgs (tail body) names args
Just idx -> (ident, rest) ->
(args !! idx) ++ substituteArgs (drop nameLen body) names args case elemIndex ident names of
where nameLen = length $ names !! idx Nothing -> ident ++ substituteArgs rest names args
where isPresent a = isPrefixOf a body Just idx -> (args !! idx) ++ substituteArgs rest names args
defaultMacroArgs :: [Maybe String] -> [String] -> Alex [String]
defaultMacroArgs [] [] = return []
defaultMacroArgs [] _ = lexicalError "too many macro arguments given"
defaultMacroArgs defaults [] = do
if all isJust defaults
then return $ map fromJust defaults
else lexicalError "too few macro arguments given"
defaultMacroArgs (f : fs) (a : as) = do
let arg = if a == "" && isJust f
then fromJust f
else a
args <- defaultMacroArgs fs as
return $ arg : args
-- directives that must always be processed even if the current code block is -- directives that must always be processed even if the current code block is
-- being excluded; we have to process conditions so we can match them up with -- being excluded; we have to process conditions so we can match them up with
...@@ -608,8 +650,7 @@ handleDirective (posOrig, _, _, strOrig) len = do ...@@ -608,8 +650,7 @@ handleDirective (posOrig, _, _, strOrig) len = do
let dropUntilNewline = removeUntil "\n" tempInput 0 let dropUntilNewline = removeUntil "\n" tempInput 0
condStack <- gets lsCondStack condStack <- gets lsCondStack
if not (null condStack) if any (/= CurrentlyTrue) condStack
&& head condStack /= CurrentlyTrue
&& not (elem directive unskippableDirectives) && not (elem directive unskippableDirectives)
then alexMonadScan then alexMonadScan
else case directive of else case directive of
...@@ -711,12 +752,8 @@ handleDirective (posOrig, _, _, strOrig) len = do ...@@ -711,12 +752,8 @@ handleDirective (posOrig, _, _, strOrig) len = do
then return body then return body
else do else do
actualArgs <- takeMacroArguments actualArgs <- takeMacroArguments
if length formalArgs == length actualArgs defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs
then return $ substituteArgs body formalArgs actualArgs return $ substituteArgs body (map fst formalArgs) defaultedArgs
else lexicalError $
"different number of macro args: " ++
(show $ length formalArgs) ++ " vs. " ++
(show $ length actualArgs)
let size = length replacement let size = length replacement
(AlexPn f l c, _, [], str) <- alexGetInput (AlexPn f l c, _, [], str) <- alexGetInput
let pos = AlexPn (f - size) l (c - size) let pos = AlexPn (f - size) l (c - size)
...@@ -757,7 +794,7 @@ tok tokId (pos, _, _, input) len = do ...@@ -757,7 +794,7 @@ tok tokId (pos, _, _, input) len = do
let tokStr = take len input let tokStr = take len input
tokPos <- toTokPos pos tokPos <- toTokPos pos
condStack <- gets lsCondStack condStack <- gets lsCondStack
() <- if not (null condStack) && head condStack /= CurrentlyTrue () <- if any (/= CurrentlyTrue) condStack
then modify id then modify id
else modify (push $ Token tokId tokStr tokPos) else modify (push $ Token tokId tokStr tokPos)
alexMonadScan alexMonadScan
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment