Last active
April 20, 2023 20:59
-
-
Save MarkPflug/6df6c1dee306685edec9a3293a83a0c0 to your computer and use it in GitHub Desktop.
CSV Validation Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// C# 11 and .NET 6+ | |
using Sylvan.Data; // 0.2.12-B0001 | |
using Sylvan.Data.Csv; // 1.2.7 | |
// the schema for the csv data below. | |
var schema = | |
new Schema.Builder() | |
// ID is required! | |
.Add<int>("Id") | |
.Add<string>("Name", allowNull: false) | |
// Date of birth can be null. | |
.Add<DateTime?>("DOB") | |
// nullable enum value | |
.Add<ConsoleColor?>("FavoriteColor") | |
.Build(); | |
// record 1 is OK | |
// record 2 has a missing ID and an unknown color. The different date format is OK. | |
// record 3 has a bad date (Unknown). The missing/null color is OK, as it is nullable. | |
var data = | |
""" | |
Id,Name,DOB,FavoriteColor | |
1,Dan,2020-01-01,Red | |
,Alex,"Jun 29, 1995",Blornge | |
3,Maria,Unknown | |
"""; | |
// create a reader with the schema | |
var opts = new CsvDataReaderOptions { Schema = new CsvSchema(schema) }; | |
var reader = CsvDataReader.Create(new StringReader(data), opts); | |
// apply the validation handler. See "HandleRecordError" function below | |
var validatingReader = reader.ValidateSchema(HandleRecordError); | |
// This will produce records for row 1, which had no errors | |
// and row 3 where the invalid date could be corrected. | |
foreach (var record in validatingReader.GetRecords<Record>()) | |
{ | |
Console.WriteLine(record.ToString()); | |
} | |
// the validation handler will log all failures | |
// it will repair bad Date values by setting the value to null | |
// Any records that can be repaired will be returned to the reader. | |
static bool HandleRecordError(SchemaValidationContext context) | |
{ | |
var csv = (CsvDataReader)context.DataReader; | |
Console.WriteLine("--- ERROR START ---"); | |
Console.WriteLine($"Error(s) on row {csv.RowNumber}. Raw Record:"); | |
Console.Out.Write(csv.GetRawRecordSpan()); | |
Console.WriteLine(""); | |
bool repaired = true; | |
foreach (var idx in context.GetErrors()) | |
{ | |
var ex = context.GetException(idx); | |
// log the error detail | |
Console.WriteLine($" col: {idx} value: \"{csv.GetString(idx)}\" exception: {ex.Message} ({ex.GetType()})"); | |
var name = csv.GetName(idx); | |
switch (name) | |
{ | |
case "DOB": // the date column | |
// unknown date, we'll set it to null | |
context.SetValue(idx, null); | |
break; | |
default: | |
// any other issue is irreparable | |
repaired = false; | |
break; | |
} | |
} | |
Console.WriteLine(repaired ? "Record was repaired" : "Record was NOT repaired"); | |
Console.WriteLine("--- ERROR END ---"); | |
return repaired; | |
} | |
class Record | |
{ | |
public int Id { get; set; } | |
public string Name { get; set; } | |
public DateTime? DOB { get; set; } | |
public ConsoleColor? FavoriteColor { get; set; } | |
public override string ToString() | |
{ | |
return $"Record: {Id} {Name} {DOB?.ToString() ?? "NULL"} {FavoriteColor}"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment