Commit d578aee5 by Zachary Snow

conflate the preprocessor and lexer

This should make it much easier to add support for ``, `", macros with
arguments, etc., in the future.
parent e69895af
......@@ -6,12 +6,11 @@ module Language.SystemVerilog.Parser
) where
import Language.SystemVerilog.AST
import Language.SystemVerilog.Parser.Lex
import Language.SystemVerilog.Parser.Parse
import Language.SystemVerilog.Parser.Preprocess
-- parses a file given a table of predefined macros and the file name
parseFile :: [String] -> [(String, String)] -> FilePath -> IO AST
parseFile includePaths env file =
loadFile file >>=
preprocess includePaths env >>=
lexFile includePaths env file >>=
return . descriptions
{
module Language.SystemVerilog.Parser.Lex (alexScanTokens) where
{- sv2v
- Author: Zachary Snow <zach@zachjs.com>
- Original Lexer Author: Tom Hawkins <tomahawkins@gmail.com>
-
- Combined source lexing and preprocessing
-
- These procedures are combined so that we can simultaneously process macros in
- a sane way (something analogous to character-by-character) and have our
- lexemes properly tagged with source file positions.
-
- The scariest piece of this module is the use of `unsafePerformIO`. We want to
- be able to search for and read files whenever we see an include directive.
- Trying to thread the IO Monad through alex's interface would be very
- convoluted. The operations performed are not effectful, and are type safe.
-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
-- The above pragma gets rid of annoying warning caused by alex 3.2.4. This has
-- been fixed on their development branch, so this can be removed once they roll
-- a new release. (no new release as of 3/29/2018)
module Language.SystemVerilog.Parser.Lex (lexFile) where
import System.FilePath (dropFileName)
import System.Directory (findFile)
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Map.Strict as Map
import Language.SystemVerilog.Parser.Tokens
}
......@@ -255,28 +279,49 @@ tokens :-
"<<<=" { tok Sym_lt_lt_lt_eq }
">>>=" { tok Sym_gt_gt_gt_eq }
"`include" { includeFile }
@directive { handleDirective }
@commentLine { removeUntil "\n" }
@commentBlock { removeUntil "*/" }
@directive { tok Spe_Directive }
@newline { tok Spe_Newline }
@whitespace ;
$white ;
. { tok Unknown }
{
type AlexUserState = [Token]
data Cond
= CurrentlyTrue
| PreviouslyTrue
| NeverTrue
deriving (Eq, Show)
alexInitUserState :: AlexUserState
alexInitUserState = []
data AlexUserState = LS
{ lsToks :: [Token]
, lsCurrFile :: FilePath
, lsEnv :: Map.Map String String
, lsCondStack :: [Cond]
, lsIncludePaths :: [FilePath]
} deriving (Eq, Show)
alexScanTokens :: String -> [Token]
alexScanTokens str =
let result = runAlex str $ alexMonadScan >> get
in case result of
Left msg -> error $ "Lex Error: " ++ msg
Right tokens -> tokens
alexInitUserState :: AlexUserState
alexInitUserState = LS [] "" Map.empty [] []
lexFile :: [String] -> [(String, String)] -> FilePath -> IO [Token]
lexFile includePaths env path = do
str <- readFile path
let result = runAlex str $ setEnv >> alexMonadScan >> get
return $ case result of
Left msg -> error $ "Lexical Error: " ++ msg
Right tokens -> lsToks tokens
where
initialEnv = Map.fromList env
setEnv = modify $ \s -> s
{ lsEnv = initialEnv
, lsIncludePaths = includePaths
, lsCurrFile = path
}
get :: Alex AlexUserState
get = Alex $ \s -> Right (s, alex_ust s)
......@@ -289,11 +334,195 @@ modify f = Alex func
where func s = Right (s { alex_ust = new }, ())
where new = f (alex_ust s)
getCurrentFile :: Alex String
getCurrentFile = gets lsCurrFile
setCurrentFile :: String -> Alex ()
setCurrentFile x = modify $ \s -> s { lsCurrFile = x }
alexEOF :: Alex ()
alexEOF = return ()
type Action = AlexInput -> Int -> Alex ()
breakAfter :: (a -> Bool) -> [a] -> ([a], [a])
breakAfter f l = (a ++ [b], bs)
where (a, b : bs) = break f l
includeSearch :: FilePath -> Alex FilePath
includeSearch file = do
base <- getCurrentFile
includePaths <- gets lsIncludePaths
let directories = dropFileName base : includePaths
let result = unsafePerformIO $ findFile directories file
case result of
Just path -> return path
Nothing ->
alexError
$ "Could not find file " ++ file ++ " included from " ++ base
loadFile :: String -> Alex String
loadFile s = return $ unsafePerformIO $ readFile s
includeFile :: Action
includeFile (AlexPn f l c, _, _, str) len = do
let (dropped , rest1) = breakAfter (== '"') (drop len str)
let (filename, rest2) = break (== '"') rest1
let rest3 = if null rest2 then [] else tail rest2
let offset = len + length dropped + length filename + 1
let inputFollow = (AlexPn (f + offset) l (c + offset), ' ', [], rest3)
fileFollow <- getCurrentFile
-- process the the included file
path <- includeSearch filename
content <- loadFile path
let inputIncluded = (AlexPn 0 0 0, ' ', [], content)
setCurrentFile path
alexSetInput inputIncluded
alexMonadScan
-- resume processing the original file
setCurrentFile fileFollow
alexSetInput inputFollow
alexMonadScan
unskippableDirectives :: [String]
unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"]
isIdentChar :: Char -> Bool
isIdentChar ch =
('a' <= ch && ch <= 'z') ||
('A' <= ch && ch <= 'Z') ||
('0' <= ch && ch <= '9') ||
(ch == '_') || (ch == '$')
takeString :: Alex String
takeString = do
(AlexPn f l c, _, _, str) <- alexGetInput
let (x, rest) = span isIdentChar str
let len = length x
alexSetInput (AlexPn (f+len) l (c+len), ' ', [], rest)
return x
getCurrentPos :: Alex Position
getCurrentPos = do
(AlexPn _ l c, _, _, _) <- alexGetInput
file <- getCurrentFile
return $ Position file l c
dropSpace :: Alex ()
dropSpace = do
(AlexPn f l c, _, _, str) <- alexGetInput
case str of
[] -> return ()
' ' : rest -> alexSetInput (AlexPn (f+1) l (c+1), ' ', [], rest)
ch : _ -> do
pos <- getCurrentPos
alexError $ "dropSpace encountered bad char: " ++ show ch ++
" at " ++ show pos
-- read tokens after the name until the first (un-escaped) newline
takeUntilNewline :: Alex String
takeUntilNewline = do
(AlexPn f l c, _, _, str) <- alexGetInput
case str of
[] -> return ""
'\n' : _ -> do
return ""
'\\' : '\n' : rest -> do
alexSetInput (AlexPn (f+2) (l+1) 0, ' ', [], rest)
takeUntilNewline >>= return . (' ' :)
ch : rest -> do
alexSetInput (AlexPn (f+1) l (c+1), ' ', [], rest)
takeUntilNewline >>= return . (ch :)
handleDirective :: Action
handleDirective (AlexPn fOrig lOrig cOrig, _, _, strOrig) len = do
let directive = tail $ take len strOrig
let newPos = AlexPn (fOrig + len) lOrig (cOrig + len)
alexSetInput (newPos, ' ', [], drop len strOrig)
env <- gets lsEnv
tempInput <- alexGetInput
let dropUntilNewline = removeUntil "\n" tempInput 0
condStack <- gets lsCondStack
if not (null condStack)
&& head condStack /= CurrentlyTrue
&& not (elem directive unskippableDirectives)
then alexMonadScan
else case directive of
"default_nettype" -> dropUntilNewline
"timescale" -> dropUntilNewline
"ifdef" -> do
dropSpace
name <- takeString
let newCond = if Map.member name env
then CurrentlyTrue
else NeverTrue
modify $ \s -> s { lsCondStack = newCond : condStack }
alexMonadScan
"ifndef" -> do
dropSpace
name <- takeString
let newCond = if Map.notMember name env
then CurrentlyTrue
else NeverTrue
modify $ \s -> s { lsCondStack = newCond : condStack }
alexMonadScan
"else" -> do
let newCond = if head condStack == NeverTrue
then CurrentlyTrue
else NeverTrue
modify $ \s -> s { lsCondStack = newCond : tail condStack }
alexMonadScan
"elsif" -> do
dropSpace
name <- takeString
let currCond = head condStack
let newCond =
if currCond /= NeverTrue then
PreviouslyTrue
else if Map.member name env then
CurrentlyTrue
else
NeverTrue
modify $ \s -> s { lsCondStack = newCond : tail condStack }
alexMonadScan
"endif" -> do
modify $ \s -> s { lsCondStack = tail condStack }
alexMonadScan
"define" -> do
-- TODO: We don't yet support macros with arguments!
dropSpace
name <- takeString
defn <- takeUntilNewline
modify $ \s -> s { lsEnv = Map.insert name defn env }
alexMonadScan
"undef" -> do
dropSpace
name <- takeString
modify $ \s -> s { lsEnv = Map.delete name env }
alexMonadScan
"undefineall" -> do
modify $ \s -> s { lsEnv = Map.empty }
alexMonadScan
_ -> do
case Map.lookup directive env of
Nothing -> do
pos <- getCurrentPos >>= return . show
alexError $ "Undefined macro: " ++ directive ++ " at " ++ pos
Just replacement -> do
let size = length replacement
-- TODO: How should we track the file position when we
-- substitute in a macro?
(AlexPn f l c, ' ', [], str) <- alexGetInput
let pos = AlexPn (f - size) l (c - size)
alexSetInput (pos, ' ', [], replacement ++ str)
alexMonadScan
-- remove characters from the input until the pattern is reached
removeUntil :: String -> Action
......@@ -306,11 +535,11 @@ removeUntil pattern _ _ = loop
let found = (null str && wantNewline)
|| pattern == take patternLen str
let nextPos = if head str == '\n'
then AlexPn f (l+1) 0
else AlexPn f l (c+1)
then AlexPn (f+1) (l+1) 0
else AlexPn (f+1) l (c+1)
let afterPos = if wantNewline
then AlexPn f (l+1) 0
else AlexPn f l (c + patternLen)
then AlexPn (f+1) (l+1) 0
else AlexPn (f+1) l (c + patternLen)
let (newPos, newStr) = if found
then (afterPos, drop patternLen str)
else (nextPos, drop 1 str)
......@@ -320,10 +549,14 @@ removeUntil pattern _ _ = loop
else loop
tok :: TokenName -> Action
tok tokId ((AlexPn _ l c), _, _, input) len =
modify (++ [t]) >> alexMonadScan
where
tokStr = take len input
tokPos = Position "" l c
t = Token tokId tokStr tokPos
tok tokId ((AlexPn _ l c), _, _, input) len = do
currFile <- gets lsCurrFile
let tokStr = take len input
let tokPos = Position currFile l c
condStack <- gets lsCondStack
() <- if not (null condStack) && head condStack /= CurrentlyTrue
then modify id
else modify (push $ Token tokId tokStr tokPos)
alexMonadScan
where push t s = s { lsToks = (lsToks s) ++ [t] }
}
{- sv2v
- Author: Zachary Snow <zach@zachjs.com>
-
- Source file loading and preprocessing
-}
module Language.SystemVerilog.Parser.Preprocess
( loadFile
, preprocess
, PP (..)
) where
import Control.Monad.State
import Data.List (intercalate)
import qualified Data.Map.Strict as Map
import System.FilePath (dropFileName)
import System.Directory (findFile)
import Language.SystemVerilog.Parser.Lex
import Language.SystemVerilog.Parser.Tokens
isNewline :: Token -> Bool
isNewline (Token t _ _) = t == Spe_Newline
unskippableDirectives :: [String]
unskippableDirectives = ["else", "elsif", "endif", "ifdef", "ifndef"]
-- a bit of a hack to allow things like: `WIDTH'b0
combineNumbers :: [Token] -> [Token]
combineNumbers (Token Lit_number size pos : Token Lit_number ('\'' : num) _ : tokens) =
Token Lit_number (size ++ "'" ++ num) pos : combineNumbers tokens
combineNumbers (token : tokens) = token : combineNumbers tokens
combineNumbers [] = []
includeSearch :: FilePath -> FilePath -> (StateT PP IO) FilePath
includeSearch base file = do
includePaths <- gets ppIncludePaths
let directories = dropFileName base : includePaths
result <- lift $ findFile directories file
case result of
Just path -> return path
Nothing ->
error $ "Could not find file " ++ file ++ " included from " ++ base
data Cond
= CurrentlyTrue
| PreviouslyTrue
| NeverTrue
deriving (Eq, Show)
data PP = PP
{ ppEnv :: Map.Map String [Token]
, ppCondStack :: [Cond]
, ppIncludePaths :: [FilePath]
} deriving (Eq, Show)
pp :: [Token] -> (StateT PP IO) [Token]
pp [] = do
condStack <- gets ppCondStack
if null condStack
then return []
else error $ "have unfinished " ++ (show $ length condStack)
++ " conditional directive(s)"
pp (Token Spe_Directive str pos : tokens) = do
let directive = tail str
condStack <- gets ppCondStack
env <- gets ppEnv
if not (null condStack)
&& head condStack /= CurrentlyTrue
&& not (elem directive unskippableDirectives)
then pp tokens
else case directive of
"default_nettype" -> do
let str' = str ++ " " ++ (tokenString $ head tokens)
let token' = Token Spe_Directive str' pos
tokens' <- pp $ tail tokens
return $ token' : tokens'
"timescale" -> do
-- timescale must appear alone on a line
-- read tokens until the first (un-escaped) newline
let (defn, rest) = break isNewline $ tokens
let str' = str ++ " " ++ (intercalate " " $ map tokenString defn)
let token' = Token Spe_Directive str' pos
tokens' <- pp rest
return $ token' : tokens'
"include" -> do
let file = init $ tail $ tokenString $ head tokens
let Position basePath _ _ = pos
filePath <- includeSearch basePath file
includedTokens <- lift $ loadFile filePath
pp $ includedTokens ++ tail tokens
"ifdef" -> do
let name = tokenString $ head tokens
newCond <- return $
if Map.member name env then CurrentlyTrue else NeverTrue
modify $ \s -> s { ppCondStack = newCond : condStack }
pp $ tail tokens
"ifndef" -> do
let name = tokenString $ head tokens
newCond <- return $
if Map.notMember name env then CurrentlyTrue else NeverTrue
modify $ \s -> s { ppCondStack = newCond : condStack }
pp $ tail tokens
"else" -> do
newCond <- return $
if head condStack == NeverTrue then CurrentlyTrue else NeverTrue
modify $ \s -> s { ppCondStack = newCond : tail condStack }
pp tokens
"elsif" -> do
let name = tokenString $ head tokens
let currCond = head condStack
newCond <- return $
if currCond /= NeverTrue then
PreviouslyTrue
else if Map.member name env then
CurrentlyTrue
else
NeverTrue
modify $ \s -> s { ppCondStack = newCond : tail condStack }
pp $ tail tokens
"endif" -> do
modify $ \s -> s { ppCondStack = tail condStack }
pp tokens
"define" -> do
-- read tokens after the name until the first (un-escaped) newline
let (defn, rest) = break isNewline $ tail tokens
-- macro definitions can contain macros, but no conditionals, so we
-- temporarily drop the condition stack while we preprocess it
modify' $ \s -> s { ppCondStack = [] }
defn' <- pp defn
modify' $ \s -> s { ppCondStack = condStack }
let env' = Map.insert (tokenString $ head tokens) defn' env
modify $ \s -> s { ppEnv = env' }
pp rest -- drop the macro, process the rest of the tokens
"undef" -> do
let name = tokenString $ head tokens
modify $ \s -> s { ppEnv = Map.delete name env }
pp $ tail tokens
"undefineall" -> do
modify $ \s -> s { ppEnv = Map.empty }
pp tokens
_ -> do
case Map.lookup directive env of
Nothing -> do
error $ "Undefined macro: " ++ directive ++ " at " ++ (show pos)
Just replacement -> do
-- TODO: How should we track the position of tokens that are
-- substituted in? Using only one position or the other
-- doesn't tell the full story.
tokens' <- pp tokens
return $ replacement ++ tokens'
pp (Token Spe_Newline _ _ : tokens) = pp tokens
pp (token : tokens) = do
condStack <- gets ppCondStack
tokens' <- pp tokens
if not (null condStack) && head condStack /= CurrentlyTrue
then return tokens'
else return $ token : tokens'
-- loads and lexes the file at the given path
loadFile :: FilePath -> IO [Token]
loadFile file = do
content <- readFile file
let tokens = alexScanTokens content
return $ map relocate tokens
where
relocate :: Token -> Token
relocate (Token t s (Position _ l c)) = Token t s $ Position file l c
preprocess :: [String] -> [(String, String)] -> [Token] -> IO [Token]
preprocess includePaths env tokens = do
let initialEnv = Map.map alexScanTokens $ Map.fromList env
let initialState = PP initialEnv [] includePaths
res <- evalStateT (pp tokens) initialState
return $ combineNumbers res
......@@ -335,6 +335,5 @@ data TokenName
| Sym_lt_lt_lt_eq
| Sym_gt_gt_gt_eq
| Spe_Directive
| Spe_Newline
| Unknown
deriving (Show, Eq)
......@@ -46,7 +46,6 @@ executable sv2v
Language.SystemVerilog.Parser.Lex
Language.SystemVerilog.Parser.Parse
Language.SystemVerilog.Parser.ParseDecl
Language.SystemVerilog.Parser.Preprocess
Language.SystemVerilog.Parser.Tokens
-- Conversion modules
Convert
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment