Skip to content

Instantly share code, notes, and snippets.

@RupertAvery
Last active December 11, 2021 00:42
Show Gist options
  • Save RupertAvery/ced40e72962390661bab0b93886250f9 to your computer and use it in GitHub Desktop.
Save RupertAvery/ced40e72962390661bab0b93886250f9 to your computer and use it in GitHub Desktop.
Trimmed INSERT Parser for ANTLR

This is a trimmed-down version of https://gist.github.com/RupertAvery/9d8bfd862ce46369ec356b25832305a9 that only parses an INSERT INTO statement.

This should compile much faster.

Remove TSqlParser.g4 from the source and replace the Program.cs with the one here.

Make sure you set the Build Action for TSqlInsertParser.g4 to ANTLR 4 grammar and the Custom Tool Namespace to **Parser""

using Antlr4.Runtime;
using Antlr4.Runtime.Tree;
using System;
namespace TSQLParser
{
class Program
{
static void Main(string[] args)
{
Parse("INSERT INTO [Address] ([AddressLine1] ,[AddressLine2] ,[City] ,[StateProvinceID] ,[PostalCode] ,[ModifiedDate]) VALUES (N'My small, cozy house',N'Small town',N'Small country',1,NULL,'2021-06-30 12:45:30.000')");
}
static void Parse(string input)
{
ICharStream stream = CharStreams.fromString(input);
var lcaseStream = new CaseChangingCharStream(stream, true);
ITokenSource lexer = new Parser.TSqlLexer(lcaseStream);
ITokenStream tokens = new CommonTokenStream(lexer);
var parser = new Parser.TSqlInsertParser(tokens);
parser.BuildParseTree = true;
var tree = parser.insert_statement();
var listener = new SqlInsertParserListener();
ParseTreeWalker.Default.Walk(listener, tree);
Console.WriteLine("Column Names:");
foreach (var columnName in listener.ColumnNames)
{
Console.WriteLine(columnName);
}
Console.WriteLine();
Console.WriteLine("Values:");
foreach (var value in listener.Values)
{
Console.WriteLine(value);
}
}
}
}
using Antlr4.Runtime.Misc;
using Parser;
using System.Collections.Generic;
namespace TSQLParser
{
public class SqlInsertParserListener : TSqlInsertParserBaseListener
{
private List<string> columnNames;
private List<string> values;
public IReadOnlyCollection<string> ColumnNames => columnNames;
public IReadOnlyCollection<string> Values => values;
public override void EnterInsert_statement([NotNull] TSqlInsertParser.Insert_statementContext context)
{
base.EnterInsert_statement(context);
columnNames = new List<string>();
values = new List<string>();
}
public override void EnterInsert_column_name_list([NotNull] TSqlInsertParser.Insert_column_name_listContext context)
{
base.EnterInsert_column_name_list(context);
var columns = context.insert_column_id();
foreach (var column in columns)
{
var columnName = column.GetText();
// The parser gurantees that the columnName should end in ], else it will throw an exception
if(columnName.StartsWith("["))
{
columnName = columnName.Substring(1, columnName.Length - 2);
}
columnNames.Add(columnName);
}
}
public override void EnterInsert_statement_value([NotNull] TSqlInsertParser.Insert_statement_valueContext context)
{
base.EnterInsert_statement_value(context);
var expList = context.expression_list();
var expressions = expList.expression();
foreach (var expression in expressions)
{
var primitive_expression = expression.primitive_expression();
var _null = primitive_expression.NULL_();
var constant = primitive_expression.constant();
if (_null != null)
{
values.Add(null);
}
else if (constant != null)
{
if(constant.STRING() != null)
{
var value = constant.STRING().GetText();
if (value.StartsWith("N"))
{
value = value.Substring(1);
}
value = value.Substring(1, value.Length - 2);
values.Add(value);
}
else if (constant.DECIMAL() != null || constant.REAL() != null || constant.FLOAT() != null)
{
var value = constant.GetText();
values.Add(value);
}
}
}
}
}
}
parser grammar TSqlInsertParser;
options { tokenVocab=TSqlLexer; }
insert_statement
: INSERT INTO
('(' insert_column_name_list ')')?
insert_statement_value
';'?
;
insert_column_name_list
: col+=insert_column_id (',' col+=insert_column_id)*
;
insert_column_id
: (ignore+=id_? '.' )* id_
;
id_
: ID
| DOUBLE_QUOTE_ID
| SQUARE_BRACKET_ID
;
insert_statement_value
: VALUES '(' exps+=expression_list ')'
;
expression_list
: exp+=expression (',' exp+=expression)*
;
expression
: primitive_expression
;
primitive_expression
: DEFAULT | NULL_ | LOCAL_ID | constant
;
constant
: STRING // string, datetime or uniqueidentifier
| BINARY
| sign? DECIMAL
| sign? (REAL | FLOAT) // float or decimal
| sign? dollar='$' (DECIMAL | FLOAT) // money
;
sign
: '+'
| '-'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment