Commit 104f9801 by Zachary Snow

support for string literals with macros

- adds support for using macros and macro arguments within the argument
  list to other macros
- fixes an issue where macros with arguments would have extraneous
  whitespace
- fixes handling of multiline string literals
parent 8f4e783f
...@@ -15,7 +15,7 @@ module Language.SystemVerilog.Parser.Preprocess ...@@ -15,7 +15,7 @@ module Language.SystemVerilog.Parser.Preprocess
import Control.Monad.Except import Control.Monad.Except
import Control.Monad.State import Control.Monad.State
import Data.Char (ord) import Data.Char (ord)
import Data.List (span, elemIndex, dropWhileEnd, splitAt, tails, isPrefixOf, findIndex) import Data.List (span, dropWhileEnd, splitAt, tails, isPrefixOf, findIndex)
import Data.Maybe (isJust, fromJust) import Data.Maybe (isJust, fromJust)
import System.Directory (findFile) import System.Directory (findFile)
import System.FilePath (dropFileName) import System.FilePath (dropFileName)
...@@ -35,6 +35,7 @@ data PP = PP ...@@ -35,6 +35,7 @@ data PP = PP
, ppEnv :: Env -- active macro definitions , ppEnv :: Env -- active macro definitions
, ppCondStack :: [Cond] -- if-else cascade state , ppCondStack :: [Cond] -- if-else cascade state
, ppIncludePaths :: [FilePath] -- folders to search for includes , ppIncludePaths :: [FilePath] -- folders to search for includes
, ppMacroStack :: [[(String, String)]] -- arguments for in-progress macro expansions
} deriving (Eq, Show) } deriving (Eq, Show)
-- keeps track of the state of an if-else cascade level -- keeps track of the state of an if-else cascade level
...@@ -51,7 +52,7 @@ preprocess includePaths env path = do ...@@ -51,7 +52,7 @@ preprocess includePaths env path = do
if path == "-" if path == "-"
then getContents then getContents
else loadFile path else loadFile path
let initialState = PP contents [] (Position path 1 1) path env [] includePaths let initialState = PP contents [] (Position path 1 1) path env [] includePaths []
result <- runExceptT $ execStateT preprocessInput initialState result <- runExceptT $ execStateT preprocessInput initialState
return $ case result of return $ case result of
Left msg -> Left msg Left msg -> Left msg
...@@ -124,6 +125,11 @@ getCondStack :: PPS [Cond] ...@@ -124,6 +125,11 @@ getCondStack :: PPS [Cond]
getCondStack = gets ppCondStack getCondStack = gets ppCondStack
setCondStack :: [Cond] -> PPS () setCondStack :: [Cond] -> PPS ()
setCondStack x = modify $ \s -> s { ppCondStack = x } setCondStack x = modify $ \s -> s { ppCondStack = x }
-- macro stack accessors
getMacroStack :: PPS [[(String, String)]]
getMacroStack = gets ppMacroStack
setMacroStack :: [[(String, String)]] -> PPS ()
setMacroStack x = modify $ \s -> s { ppMacroStack = x }
-- combined input and position accessors -- combined input and position accessors
setBuffer :: (String, Position) -> PPS () setBuffer :: (String, Position) -> PPS ()
setBuffer (x, p) = do setBuffer (x, p) = do
...@@ -254,6 +260,7 @@ takeMacroDefinition = do ...@@ -254,6 +260,7 @@ takeMacroDefinition = do
return (body, []) return (body, [])
else do else do
args <- takeMacroArguments args <- takeMacroArguments
dropSpaces
body <- takeUntilNewline body <- takeUntilNewline
argsWithDefaults <- mapM splitArg args argsWithDefaults <- mapM splitArg args
if null args if null args
...@@ -282,7 +289,7 @@ takeMacroArguments = do ...@@ -282,7 +289,7 @@ takeMacroArguments = do
dropWhitespace dropWhitespace
leadCh <- takeChar leadCh <- takeChar
if leadCh == '(' if leadCh == '('
then argLoop then argLoop >>= mapM preprocessString
else lexicalError $ "expected begining of macro arguments, but found " else lexicalError $ "expected begining of macro arguments, but found "
++ show leadCh ++ show leadCh
where where
...@@ -318,39 +325,6 @@ takeMacroArguments = do ...@@ -318,39 +325,6 @@ takeMacroArguments = do
( s,'\n') -> loop (curr ++ [' ']) s ( s,'\n') -> loop (curr ++ [' ']) s
( s, _ ) -> loop (curr ++ [ch ]) s ( s, _ ) -> loop (curr ++ [ch ]) s
findUnescapedQuote :: String -> (String, String)
findUnescapedQuote [] = ([], [])
findUnescapedQuote ('`' : '\\' : '`' : '"' : rest) = ('\\' : '"' : start, end)
where (start, end) = findUnescapedQuote rest
findUnescapedQuote ('\\' : '"' : rest) = ('\\' : '"' : start, end)
where (start, end) = findUnescapedQuote rest
findUnescapedQuote ('"' : rest) = ("\"", rest)
findUnescapedQuote ('`' : '"' : rest) = ("\"", rest)
findUnescapedQuote (ch : rest) = (ch : start, end)
where (start, end) = findUnescapedQuote rest
-- substitute in the arguments for a macro expansion
substituteArgs :: String -> [String] -> [String] -> String
substituteArgs "" _ _ = ""
substituteArgs ('`' : '`' : body) names args =
substituteArgs body names args
substituteArgs ('"' : body) names args =
'"' : start ++ substituteArgs rest names args
where (start, rest) = findUnescapedQuote body
substituteArgs ('\\' : '"' : body) names args =
'\\' : '"' : substituteArgs body names args
substituteArgs ('`' : '"' : body) names args =
'"' : substituteArgs (init start) names args
++ '"' : substituteArgs rest names args
where (start, rest) = findUnescapedQuote body
substituteArgs body names args =
case span isIdentChar body of
([], _) -> head body : substituteArgs (tail body) names args
(ident, rest) ->
case elemIndex ident names of
Nothing -> ident ++ substituteArgs rest names args
Just idx -> (args !! idx) ++ substituteArgs rest names args
defaultMacroArgs :: [Maybe String] -> [String] -> PPS [String] defaultMacroArgs :: [Maybe String] -> [String] -> PPS [String]
defaultMacroArgs [] [] = return [] defaultMacroArgs [] [] = return []
defaultMacroArgs [] _ = lexicalError "too many macro arguments given" defaultMacroArgs [] _ = lexicalError "too many macro arguments given"
...@@ -435,25 +409,154 @@ directives = ...@@ -435,25 +409,154 @@ directives =
preprocessInput :: PPS () preprocessInput :: PPS ()
preprocessInput = do preprocessInput = do
str <- getInput str <- getInput
macroStack <- getMacroStack
case str of case str of
'/' : '/' : _ -> removeThrough "\n" '/' : '/' : _ -> removeThrough "\n"
'/' : '*' : _ -> removeThrough "*/" '/' : '*' : _ -> removeThrough "*/"
'`' : _ -> handleDirective '`' : '"' : _ -> handleBacktickString
ch : chs -> do '"' : _ -> handleString
pos <- getPosition '`' : '`' : _ -> do
advancePosition ch if null macroStack
setInput chs then do
condStack <- getCondStack consume
if any (/= CurrentlyTrue) condStack consume
then return () else do
else pushChar ch pos '`' <- takeChar
'`' <- takeChar
return ()
'`' : _ -> handleDirective False
_ : _ -> consumeWithSubstitution
[] -> return () [] -> return ()
if str == [] if str == []
then return () then return ()
else preprocessInput else preprocessInput
handleDirective :: PPS () -- if we are expanding a macro, and the leading tokens form an identifier, then
handleDirective = do -- attempt to replace that identifier with the arguments of this macro, if
-- applicable; otherwise, just consume the top character
consumeWithSubstitution :: PPS ()
consumeWithSubstitution = do
str <- getInput
macroStack <- getMacroStack
if null macroStack then
consume
else do
let (ident, rest) = span isIdentChar str
if null ident then
consume
else do
pos <- getPosition
let args = head macroStack
let chars = case lookup ident args of
Nothing -> ident
Just val -> val
pushChars chars pos
advancePositions ident
setInput rest
-- consume takes the lead input character and pushes it into the output,
-- advancing the position state and removing the lead character from the input
consume :: PPS ()
consume = do
ch : chs <- getInput
pos <- getPosition
advancePosition ch
setInput chs
pushChar ch pos
-- preprocess a leading string literal; this routine is largely necessary to
-- avoid doing any macro or directive related manipulations within standard
-- string literals; it also handles escaped newlines in the string
handleString :: PPS ()
handleString = do
consume
loop
where
-- processes the remainder of a standard string literal
loop :: PPS ()
loop = do
input <- getInput
case input of
'"' : _ -> do
consume
-- end of loop!
'\\' : '\n' : _ -> do
'\\' <- takeChar
'\n' <- takeChar
loop
'\\' : '\\' : _ -> do
consume
consume
loop
'\\' : '"' : _ -> do
consume
consume
loop
_ : _ -> do
consume
loop
[] -> lexicalError "unterminated string literal"
-- preprocess a "backtick string", which begins and ends with a backtick
-- followed by a slash (`"), and withing which macros can be invoked as normal;
-- otherwise, normal string literal rules apply, except that unescaped quotes
-- are forbidden, and backticks must be escaped using a backslash to avoid being
-- interpreted as a macro or marking the end of a string
handleBacktickString :: PPS ()
handleBacktickString = do
'`' <- takeChar
consume
loop
where
-- processes the remainder of a leading backtick string, up to and
-- including the ending `"
loop :: PPS ()
loop = do
input <- getInput
macroStack <- getMacroStack
case input of
'`' : '"' : _ -> do
'`' <- takeChar
consume -- ending quote
-- end of loop!
'\\' : '`' : _ -> do
'\\' <- takeChar
consume -- now un-escaped backtick
loop
'\\' : '\\' : _ -> do
consume
consume
loop
'\\' : '"' : _ -> do
consume
consume
loop
'\\' : '\n' : _ -> do
'\\' <- takeChar
'\n' <- takeChar
loop
'`' : '\\' : '`' : '"' : _ -> do
'`' <- takeChar
consume
'`' <- takeChar
consume
if null macroStack
then lexicalError "`\\`\" is not allowed outside of macros"
else loop
'`' : _ -> do
handleDirective True
loop
'"' : _ ->
if null macroStack
then lexicalError "unescaped quote in backtick string"
else consume -- end of loop!
_ : _ -> do
consumeWithSubstitution
loop
[] -> lexicalError "unterminated backtick string"
handleDirective :: Bool -> PPS ()
handleDirective macrosOnly = do
directivePos <- getPosition directivePos <- getPosition
'`' <- takeChar '`' <- takeChar
directive <- takeIdentifier directive <- takeIdentifier
...@@ -461,14 +564,15 @@ handleDirective = do ...@@ -461,14 +564,15 @@ handleDirective = do
-- helper for directives which are not operated on -- helper for directives which are not operated on
let passThrough = do let passThrough = do
pushChar '`' directivePos pushChar '`' directivePos
_ <- mapM (flip pushChar directivePos) directive pushChars directive directivePos
return ()
env <- getEnv env <- getEnv
condStack <- getCondStack condStack <- getCondStack
if any (/= CurrentlyTrue) condStack if any (/= CurrentlyTrue) condStack
&& not (elem directive unskippableDirectives) && not (elem directive unskippableDirectives) then
then return () return ()
else if macrosOnly && elem directive directives then
lexicalError "compiler directives are forbidden inside strings"
else case directive of else case directive of
"timescale" -> removeThrough "\n" "timescale" -> removeThrough "\n"
...@@ -585,22 +689,27 @@ handleDirective = do ...@@ -585,22 +689,27 @@ handleDirective = do
case Map.lookup directive env of case Map.lookup directive env of
Nothing -> lexicalError $ "Undefined macro: " ++ directive Nothing -> lexicalError $ "Undefined macro: " ++ directive
Just (body, formalArgs) -> do Just (body, formalArgs) -> do
replacement <- if null formalArgs (names, args) <- if null formalArgs
then return body then return ([], [])
else do else do
actualArgs <- takeMacroArguments actualArgs <- takeMacroArguments
defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs defaultedArgs <- defaultMacroArgs (map snd formalArgs) actualArgs
return $ substituteArgs body (map fst formalArgs) defaultedArgs return (map fst formalArgs, defaultedArgs)
-- save our current state -- save our current state
currFile <- getFilePath currFile <- getFilePath
macroStack <- getMacroStack
bufFollow <- getBuffer bufFollow <- getBuffer
-- lex the macro expansion, preserving the file and line -- lex the macro expansion, preserving the file and line
let Position _ l c = snd bufFollow let Position _ l c = snd bufFollow
let loc = "macro expansion of " ++ directive ++ " at " ++ currFile let loc = "macro expansion of " ++ directive ++ " at " ++ currFile
let pos = Position loc l (c - length directive - 1) let pos = Position loc l (c - length directive - 1)
setBuffer (replacement, pos) setMacroStack $ (zip names args) : macroStack
setBuffer (body, pos)
preprocessInput preprocessInput
"" <- getInput
setMacroStack $ error $ show $ (zip names args) : macroStack
-- return to the rest of the input -- return to the rest of the input
setMacroStack macroStack
setBuffer bufFollow setBuffer bufFollow
-- inserts the given string into the output at the given position -- inserts the given string into the output at the given position
...@@ -633,6 +742,26 @@ lineLookahead = do ...@@ -633,6 +742,26 @@ lineLookahead = do
setOutput outputOrig setOutput outputOrig
setCondStack condStackOrig setCondStack condStackOrig
-- run the given string through the current preprocessor state, but out of band
preprocessString :: String -> PPS String
preprocessString str = do
-- save the state
outputOrig <- gets ppOutput
condStackOrig <- getCondStack
bufferOrig <- getBuffer
-- process the line
setOutput []
setCondStack []
setInput str
preprocessInput
outputAfter <- getOutput
-- restore the previous state
setBuffer bufferOrig
setOutput outputOrig
setCondStack condStackOrig
-- get the result characters
return $ reverse $ map fst outputAfter
-- update the position in the preprocessor state according to the movement of -- update the position in the preprocessor state according to the movement of
-- the given character -- the given character
advancePosition :: Char -> PPS () advancePosition :: Char -> PPS ()
...@@ -652,8 +781,17 @@ advancePositions str = do ...@@ -652,8 +781,17 @@ advancePositions str = do
-- adds a character (and its position) to the output state -- adds a character (and its position) to the output state
pushChar :: Char -> Position -> PPS () pushChar :: Char -> Position -> PPS ()
pushChar c p = do pushChar c p = do
output <- getOutput condStack <- getCondStack
setOutput $ (c, p) : output if any (/= CurrentlyTrue) condStack
then return ()
else do
output <- getOutput
setOutput $ (c, p) : output
-- adds a sequence of characters all at the same given position
pushChars :: String -> Position -> PPS ()
pushChars s p = do
_ <- mapM (flip pushChar p) s
return ()
-- search for a pattern in the input and remove remove characters up to and -- search for a pattern in the input and remove remove characters up to and
-- including the first occurrence of the pattern -- including the first occurrence of the pattern
......
`define FOO BAR
`define BAR(e) prefix``e
`define BAZ `"FOO`\`"`FOO`"
module top;
initial begin
$display("FOO");
$display("`FOO");
$display(`"`FOO`");
$display(`"\`FOO`");
$display(`"\\`FOO`");
$display(`"\\\`FOO`");
$display(`"\"FOO`");
$display(`"\"`FOO`");
$display(`"\"FOO\"`");
$display(`"\"`FOO\"`");
$display(`"FOO`");
$display(`"FOO`FOO`");
$display(`"`BAR(LOL)`");
$display(`"\`BAR(LOL)`");
$display(`"\\`BAR(LOL)`");
$display(`"\\\`BAR(LOL)`");
$display(`"`BAR(`FOO)`");
$display(`"\`BAR(`FOO)`");
$display(`"\\`BAR(`FOO)`");
$display(`"\\\`BAR(`FOO)`");
$display(`"`BAR(s`FOO)`");
$display(`"\`BAR(s`FOO)`");
$display(`"\\`BAR(s`FOO)`");
$display(`"\\\`BAR(s`FOO)`");
$display(`BAZ);
`ifdef DNE
$display(`DNE);
$display(`"`DNE`");
`define
`line
`foo
`endif
$display("TEST END");
$display("TEST\
END");
$display(`"TEST\
END`");
$display(`"TEST\`FOO\
END`");
$display(`"TEST\"`FOO\
END`");
$display(`"TEST\"`FOO\
END`FOO`");
end
endmodule
module top;
initial begin
$display("FOO");
$display("`FOO");
$display("BAR");
$display("`FOO");
$display("\\BAR");
$display("\\`FOO");
$display("\"FOO");
$display("\"BAR");
$display("\"FOO\"");
$display("\"BAR\"");
$display("FOO");
$display("FOOBAR");
$display("prefixLOL");
$display("`BAR(LOL)");
$display("\\prefixLOL");
$display("\\`BAR(LOL)");
$display("prefixBAR");
$display("`BAR(BAR)");
$display("\\prefixBAR");
$display("\\`BAR(BAR)");
$display("prefixsBAR");
$display("`BAR(sBAR)");
$display("\\prefixsBAR");
$display("\\`BAR(sBAR)");
$display("FOO\"BAR");
$display("TEST END");
$display("TEST END");
$display("TEST END");
$display("TEST`FOO END");
$display("TEST\"BAR END");
$display("TEST\"BAR ENDBAR");
end
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment