Commit e69895af by Zachary Snow

initial setup for combining pre-processor and lexer

parent 0d9ed3e1
...@@ -4,7 +4,7 @@ module Language.SystemVerilog.Parser.Lex (alexScanTokens) where ...@@ -4,7 +4,7 @@ module Language.SystemVerilog.Parser.Lex (alexScanTokens) where
import Language.SystemVerilog.Parser.Tokens import Language.SystemVerilog.Parser.Tokens
} }
%wrapper "posn" %wrapper "monadUserState"
-- Numbers -- Numbers
...@@ -42,13 +42,13 @@ $decimalDigit = [0-9] ...@@ -42,13 +42,13 @@ $decimalDigit = [0-9]
@unbasedUnsizedLiteral = "'" ( 0 | 1 | x | X | z | Z ) @unbasedUnsizedLiteral = "'" ( 0 | 1 | x | X | z | Z )
@number @number
= @unsignedNumber = @unsignedNumber
| @decimalNumber | @decimalNumber
| @octalNumber | @octalNumber
| @binaryNumber | @binaryNumber
| @hexNumber | @hexNumber
| @unbasedUnsizedLiteral | @unbasedUnsizedLiteral
| @realNumber | @realNumber
-- Strings -- Strings
...@@ -62,9 +62,8 @@ $decimalDigit = [0-9] ...@@ -62,9 +62,8 @@ $decimalDigit = [0-9]
-- Comments -- Comments
@commentBegin = "/*" @commentBlock = "/*"
@commentEnd = "*/" | "**/" @commentLine = "//"
@comment = "//" [^\n]* | "/**/"
-- Directives -- Directives
...@@ -78,195 +77,253 @@ $decimalDigit = [0-9] ...@@ -78,195 +77,253 @@ $decimalDigit = [0-9]
tokens :- tokens :-
"always" { tok KW_always } "always" { tok KW_always }
"always_comb" { tok KW_always_comb } "always_comb" { tok KW_always_comb }
"always_ff" { tok KW_always_ff } "always_ff" { tok KW_always_ff }
"always_latch" { tok KW_always_latch } "always_latch" { tok KW_always_latch }
"and" { tok KW_and } "and" { tok KW_and }
"assign" { tok KW_assign } "assign" { tok KW_assign }
"automatic" { tok KW_automatic } "automatic" { tok KW_automatic }
"begin" { tok KW_begin } "begin" { tok KW_begin }
"bit" { tok KW_bit } "bit" { tok KW_bit }
"buf" { tok KW_buf } "buf" { tok KW_buf }
"byte" { tok KW_byte } "byte" { tok KW_byte }
"case" { tok KW_case } "case" { tok KW_case }
"casex" { tok KW_casex } "casex" { tok KW_casex }
"casez" { tok KW_casez } "casez" { tok KW_casez }
"default" { tok KW_default } "default" { tok KW_default }
"defparam" { tok KW_defparam } "defparam" { tok KW_defparam }
"do" { tok KW_do } "do" { tok KW_do }
"else" { tok KW_else } "else" { tok KW_else }
"end" { tok KW_end } "end" { tok KW_end }
"endcase" { tok KW_endcase } "endcase" { tok KW_endcase }
"endfunction" { tok KW_endfunction } "endfunction" { tok KW_endfunction }
"endgenerate" { tok KW_endgenerate } "endgenerate" { tok KW_endgenerate }
"endinterface" { tok KW_endinterface } "endinterface" { tok KW_endinterface }
"endmodule" { tok KW_endmodule } "endmodule" { tok KW_endmodule }
"endtask" { tok KW_endtask } "endtask" { tok KW_endtask }
"enum" { tok KW_enum } "enum" { tok KW_enum }
"extern" { tok KW_extern } "extern" { tok KW_extern }
"for" { tok KW_for } "for" { tok KW_for }
"forever" { tok KW_forever } "forever" { tok KW_forever }
"function" { tok KW_function } "function" { tok KW_function }
"generate" { tok KW_generate } "generate" { tok KW_generate }
"genvar" { tok KW_genvar } "genvar" { tok KW_genvar }
"if" { tok KW_if } "if" { tok KW_if }
"initial" { tok KW_initial } "initial" { tok KW_initial }
"inout" { tok KW_inout } "inout" { tok KW_inout }
"input" { tok KW_input } "input" { tok KW_input }
"int" { tok KW_int } "int" { tok KW_int }
"integer" { tok KW_integer } "integer" { tok KW_integer }
"interface" { tok KW_interface } "interface" { tok KW_interface }
"localparam" { tok KW_localparam } "localparam" { tok KW_localparam }
"logic" { tok KW_logic } "logic" { tok KW_logic }
"longint" { tok KW_longint } "longint" { tok KW_longint }
"modport" { tok KW_modport } "modport" { tok KW_modport }
"module" { tok KW_module } "module" { tok KW_module }
"nand" { tok KW_nand } "nand" { tok KW_nand }
"negedge" { tok KW_negedge } "negedge" { tok KW_negedge }
"nor" { tok KW_nor } "nor" { tok KW_nor }
"not" { tok KW_not } "not" { tok KW_not }
"or" { tok KW_or } "or" { tok KW_or }
"output" { tok KW_output } "output" { tok KW_output }
"packed" { tok KW_packed } "packed" { tok KW_packed }
"parameter" { tok KW_parameter } "parameter" { tok KW_parameter }
"posedge" { tok KW_posedge } "posedge" { tok KW_posedge }
"real" { tok KW_real } "real" { tok KW_real }
"realtime" { tok KW_realtime } "realtime" { tok KW_realtime }
"reg" { tok KW_reg } "reg" { tok KW_reg }
"repeat" { tok KW_repeat } "repeat" { tok KW_repeat }
"return" { tok KW_return } "return" { tok KW_return }
"shortint" { tok KW_shortint } "shortint" { tok KW_shortint }
"shortreal" { tok KW_shortreal } "shortreal" { tok KW_shortreal }
"signed" { tok KW_signed } "signed" { tok KW_signed }
"static" { tok KW_static } "static" { tok KW_static }
"struct" { tok KW_struct } "struct" { tok KW_struct }
"supply0" { tok KW_supply0 } "supply0" { tok KW_supply0 }
"supply1" { tok KW_supply1 } "supply1" { tok KW_supply1 }
"task" { tok KW_task } "task" { tok KW_task }
"time" { tok KW_time } "time" { tok KW_time }
"tri" { tok KW_tri } "tri" { tok KW_tri }
"tri0" { tok KW_tri0 } "tri0" { tok KW_tri0 }
"tri1" { tok KW_tri1 } "tri1" { tok KW_tri1 }
"triand" { tok KW_triand } "triand" { tok KW_triand }
"trior" { tok KW_trior } "trior" { tok KW_trior }
"trireg" { tok KW_trireg } "trireg" { tok KW_trireg }
"typedef" { tok KW_typedef } "typedef" { tok KW_typedef }
"unique" { tok KW_unique } "unique" { tok KW_unique }
"unsigned" { tok KW_unsigned } "unsigned" { tok KW_unsigned }
"uwire" { tok KW_uwire } "uwire" { tok KW_uwire }
"wand" { tok KW_wand } "wand" { tok KW_wand }
"while" { tok KW_while } "while" { tok KW_while }
"wire" { tok KW_wire } "wire" { tok KW_wire }
"wor" { tok KW_wor } "wor" { tok KW_wor }
"xnor" { tok KW_xnor } "xnor" { tok KW_xnor }
"xor" { tok KW_xor } "xor" { tok KW_xor }
@simpleIdentifier { tok Id_simple } @simpleIdentifier { tok Id_simple }
@escapedIdentifier { tok Id_escaped } @escapedIdentifier { tok Id_escaped }
@systemIdentifier { tok Id_system } @systemIdentifier { tok Id_system }
@number { tok Lit_number } @number { tok Lit_number }
@string { tok Lit_string } @string { tok Lit_string }
"(" { tok Sym_paren_l } "(" { tok Sym_paren_l }
")" { tok Sym_paren_r } ")" { tok Sym_paren_r }
"[" { tok Sym_brack_l } "[" { tok Sym_brack_l }
"]" { tok Sym_brack_r } "]" { tok Sym_brack_r }
"{" { tok Sym_brace_l } "{" { tok Sym_brace_l }
"}" { tok Sym_brace_r } "}" { tok Sym_brace_r }
"~" { tok Sym_tildy } "~" { tok Sym_tildy }
"!" { tok Sym_bang } "!" { tok Sym_bang }
"@" { tok Sym_at } "@" { tok Sym_at }
"#" { tok Sym_pound } "#" { tok Sym_pound }
"%" { tok Sym_percent } "%" { tok Sym_percent }
"^" { tok Sym_hat } "^" { tok Sym_hat }
"&" { tok Sym_amp } "&" { tok Sym_amp }
"|" { tok Sym_bar } "|" { tok Sym_bar }
"*" { tok Sym_aster } "*" { tok Sym_aster }
"." { tok Sym_dot } "." { tok Sym_dot }
"," { tok Sym_comma } "," { tok Sym_comma }
":" { tok Sym_colon } ":" { tok Sym_colon }
";" { tok Sym_semi } ";" { tok Sym_semi }
"=" { tok Sym_eq } "=" { tok Sym_eq }
"<" { tok Sym_lt } "<" { tok Sym_lt }
">" { tok Sym_gt } ">" { tok Sym_gt }
"+" { tok Sym_plus } "+" { tok Sym_plus }
"-" { tok Sym_dash } "-" { tok Sym_dash }
"?" { tok Sym_question } "?" { tok Sym_question }
"/" { tok Sym_slash } "/" { tok Sym_slash }
"$" { tok Sym_dollar } "$" { tok Sym_dollar }
"'" { tok Sym_s_quote } "'" { tok Sym_s_quote }
"~&" { tok Sym_tildy_amp } "~&" { tok Sym_tildy_amp }
"~|" { tok Sym_tildy_bar } "~|" { tok Sym_tildy_bar }
"~^" { tok Sym_tildy_hat } "~^" { tok Sym_tildy_hat }
"^~" { tok Sym_hat_tildy } "^~" { tok Sym_hat_tildy }
"==" { tok Sym_eq_eq } "==" { tok Sym_eq_eq }
"!=" { tok Sym_bang_eq } "!=" { tok Sym_bang_eq }
"&&" { tok Sym_amp_amp } "&&" { tok Sym_amp_amp }
"||" { tok Sym_bar_bar } "||" { tok Sym_bar_bar }
"**" { tok Sym_aster_aster } "**" { tok Sym_aster_aster }
"<=" { tok Sym_lt_eq } "<=" { tok Sym_lt_eq }
">=" { tok Sym_gt_eq } ">=" { tok Sym_gt_eq }
">>" { tok Sym_gt_gt } ">>" { tok Sym_gt_gt }
"<<" { tok Sym_lt_lt } "<<" { tok Sym_lt_lt }
"++" { tok Sym_plus_plus } "++" { tok Sym_plus_plus }
"--" { tok Sym_dash_dash } "--" { tok Sym_dash_dash }
"+=" { tok Sym_plus_eq } "+=" { tok Sym_plus_eq }
"-=" { tok Sym_dash_eq } "-=" { tok Sym_dash_eq }
"*=" { tok Sym_aster_eq } "*=" { tok Sym_aster_eq }
"/=" { tok Sym_slash_eq } "/=" { tok Sym_slash_eq }
"%=" { tok Sym_percent_eq } "%=" { tok Sym_percent_eq }
"&=" { tok Sym_amp_eq } "&=" { tok Sym_amp_eq }
"|=" { tok Sym_bar_eq } "|=" { tok Sym_bar_eq }
"^=" { tok Sym_hat_eq } "^=" { tok Sym_hat_eq }
"+:" { tok Sym_plus_colon } "+:" { tok Sym_plus_colon }
"-:" { tok Sym_dash_colon } "-:" { tok Sym_dash_colon }
"::" { tok Sym_colon_colon } "::" { tok Sym_colon_colon }
".*" { tok Sym_dot_aster } ".*" { tok Sym_dot_aster }
"->" { tok Sym_dash_gt } "->" { tok Sym_dash_gt }
":=" { tok Sym_colon_eq } ":=" { tok Sym_colon_eq }
":/" { tok Sym_colon_slash } ":/" { tok Sym_colon_slash }
"##" { tok Sym_pound_pound } "##" { tok Sym_pound_pound }
"[*" { tok Sym_brack_l_aster } "[*" { tok Sym_brack_l_aster }
"[=" { tok Sym_brack_l_eq } "[=" { tok Sym_brack_l_eq }
"=>" { tok Sym_eq_gt } "=>" { tok Sym_eq_gt }
"@*" { tok Sym_at_aster } "@*" { tok Sym_at_aster }
"(*" { tok Sym_paren_l_aster } "(*" { tok Sym_paren_l_aster }
"*)" { tok Sym_aster_paren_r } "*)" { tok Sym_aster_paren_r }
"*>" { tok Sym_aster_gt } "*>" { tok Sym_aster_gt }
"===" { tok Sym_eq_eq_eq } "===" { tok Sym_eq_eq_eq }
"!==" { tok Sym_bang_eq_eq } "!==" { tok Sym_bang_eq_eq }
"==?" { tok Sym_eq_eq_question } "==?" { tok Sym_eq_eq_question }
"!=?" { tok Sym_bang_eq_question } "!=?" { tok Sym_bang_eq_question }
">>>" { tok Sym_gt_gt_gt } ">>>" { tok Sym_gt_gt_gt }
"<<<" { tok Sym_lt_lt_lt } "<<<" { tok Sym_lt_lt_lt }
"<<=" { tok Sym_lt_lt_eq } "<<=" { tok Sym_lt_lt_eq }
">>=" { tok Sym_gt_gt_eq } ">>=" { tok Sym_gt_gt_eq }
"|->" { tok Sym_bar_dash_gt } "|->" { tok Sym_bar_dash_gt }
"|=>" { tok Sym_bar_eq_gt } "|=>" { tok Sym_bar_eq_gt }
"[->" { tok Sym_brack_l_dash_gt } "[->" { tok Sym_brack_l_dash_gt }
"@@(" { tok Sym_at_at_paren_l } "@@(" { tok Sym_at_at_paren_l }
"(*)" { tok Sym_paren_l_aster_paren_r } "(*)" { tok Sym_paren_l_aster_paren_r }
"->>" { tok Sym_dash_gt_gt } "->>" { tok Sym_dash_gt_gt }
"&&&" { tok Sym_amp_amp_amp } "&&&" { tok Sym_amp_amp_amp }
"<<<=" { tok Sym_lt_lt_lt_eq } "<<<=" { tok Sym_lt_lt_lt_eq }
">>>=" { tok Sym_gt_gt_gt_eq } ">>>=" { tok Sym_gt_gt_gt_eq }
@comment { tok Spe_Comment } @commentLine { removeUntil "\n" }
@commentBegin { tok Spe_CommentBegin } @commentBlock { removeUntil "*/" }
@commentEnd { tok Spe_CommentEnd } @directive { tok Spe_Directive }
@directive { tok Spe_Directive } @newline { tok Spe_Newline }
@newline { tok Spe_Newline }
@whitespace ;
@whitespace ;
. { tok Unknown }
. { tok Unknown }
{ {
tok :: TokenName -> AlexPosn -> String -> Token
tok t (AlexPn _ l c) s = Token t s $ Position "" l c type AlexUserState = [Token]
alexInitUserState :: AlexUserState
alexInitUserState = []
alexScanTokens :: String -> [Token]
alexScanTokens str =
let result = runAlex str $ alexMonadScan >> get
in case result of
Left msg -> error $ "Lex Error: " ++ msg
Right tokens -> tokens
get :: Alex AlexUserState
get = Alex $ \s -> Right (s, alex_ust s)
gets :: (AlexUserState -> a) -> Alex a
gets f = get >>= return . f
modify :: (AlexUserState -> AlexUserState) -> Alex ()
modify f = Alex func
where func s = Right (s { alex_ust = new }, ())
where new = f (alex_ust s)
alexEOF :: Alex ()
alexEOF = return ()
type Action = AlexInput -> Int -> Alex ()
-- remove characters from the input until the pattern is reached
removeUntil :: String -> Action
removeUntil pattern _ _ = loop
where
patternLen = length pattern
wantNewline = pattern == "\n"
loop = do
(AlexPn f l c, _, _, str) <- alexGetInput
let found = (null str && wantNewline)
|| pattern == take patternLen str
let nextPos = if head str == '\n'
then AlexPn f (l+1) 0
else AlexPn f l (c+1)
let afterPos = if wantNewline
then AlexPn f (l+1) 0
else AlexPn f l (c + patternLen)
let (newPos, newStr) = if found
then (afterPos, drop patternLen str)
else (nextPos, drop 1 str)
alexSetInput (newPos, ' ', [], newStr)
if found
then alexMonadScan
else loop
tok :: TokenName -> Action
tok tokId ((AlexPn _ l c), _, _, input) len =
modify (++ [t]) >> alexMonadScan
where
tokStr = take len input
tokPos = Position "" l c
t = Token tokId tokStr tokPos
} }
...@@ -159,11 +159,6 @@ pp (Token Spe_Directive str pos : tokens) = do ...@@ -159,11 +159,6 @@ pp (Token Spe_Directive str pos : tokens) = do
return $ replacement ++ tokens' return $ replacement ++ tokens'
pp (Token Spe_Newline _ _ : tokens) = pp tokens pp (Token Spe_Newline _ _ : tokens) = pp tokens
pp (Token Spe_Comment _ _ : tokens) = pp tokens
pp (Token Spe_CommentBegin _ _ : tokens) =
pp $ tail $ dropWhile (not . isEnd) tokens
where isEnd (Token t _ _ ) = t == Spe_CommentEnd
pp (token : tokens) = do pp (token : tokens) = do
condStack <- gets ppCondStack condStack <- gets ppCondStack
......
...@@ -334,9 +334,6 @@ data TokenName ...@@ -334,9 +334,6 @@ data TokenName
| Sym_amp_amp_amp | Sym_amp_amp_amp
| Sym_lt_lt_lt_eq | Sym_lt_lt_lt_eq
| Sym_gt_gt_gt_eq | Sym_gt_gt_gt_eq
| Spe_Comment
| Spe_CommentBegin
| Spe_CommentEnd
| Spe_Directive | Spe_Directive
| Spe_Newline | Spe_Newline
| Unknown | Unknown
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment