Last active
October 29, 2019 03:40
-
-
Save stephen-soltesz/fd52e5633563e9f652f83606cfdeb526 to your computer and use it in GitHub Desktop.
reflect / schema
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://gist.github.com/stephen-soltesz/fd52e5633563e9f652f83606cfdeb526 reflect-schema | |
cd reflect-schema/ | |
go get . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ParseInfo: | |
Description: The thing | |
TaskFileName: | |
Description: archive filename containing test_id. | |
ParseTime: | |
Description: time the parser parsed. | |
ParserVersion: | |
Description: source version of the ETL parser. | |
test_id: | |
Description: filename of measurement. | |
log_time: | |
Description: time of the test measurement |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"reflect" | |
"github.com/kr/pretty" | |
"cloud.google.com/go/bigquery" | |
"github.com/m-lab/etl/schema" | |
"github.com/m-lab/go/rtx" | |
yaml "gopkg.in/yaml.v2" | |
) | |
// SchemaDoc documents fields discovered during InferSchema. | |
type SchemaDoc map[string]map[string]string | |
// NewSchemaDoc returns a new schema doc initialized with the field | |
// descriptions in the given file. Failures are fatal. | |
func NewSchemaDoc(file string) *SchemaDoc { | |
docs, err := ioutil.ReadFile(file) | |
rtx.Must(err, "Failed to read list") | |
sd := &SchemaDoc{} | |
err = yaml.Unmarshal([]byte(docs), sd) | |
rtx.Must(err, "Failed to unmarshal %q", file) | |
return sd | |
} | |
// InferSchema generates a schema from the given value. Invalid fields are ignored. | |
func (m SchemaDoc) InferSchema(val interface{}) bigquery.Schema { | |
t := reflect.TypeOf(val) | |
s := m.inferSchema("", "", t) | |
return s.Schema | |
} | |
func newFieldSchema( | |
name, doc string, repeated bool, fieldType bigquery.FieldType) *bigquery.FieldSchema { | |
return &bigquery.FieldSchema{ | |
Name: name, | |
Description: doc, | |
Repeated: repeated, | |
Required: false, | |
Type: fieldType, | |
} | |
} | |
func getFieldName(f reflect.StructField) string { | |
name := f.Name | |
v, ok := f.Tag.Lookup("bigquery") | |
if ok { | |
if v == "-" { | |
return "" | |
} | |
name = v | |
} | |
return name | |
} | |
// Check for bqx name then fallback to fieldName. | |
func getDocName(f reflect.StructField) string { | |
name := getFieldName(f) | |
v, ok := f.Tag.Lookup("bqx") | |
if ok { | |
name = v | |
} | |
return name | |
} | |
func (m SchemaDoc) inferSchema(fieldName, docName string, t reflect.Type) *bigquery.FieldSchema { | |
fmt.Println("docname: ", docName) | |
doc := m[docName] | |
/* | |
TODO: BytesFieldType FieldType = "BYTES" | |
TODO: DateFieldType FieldType = "DATE" | |
TODO: TimeFieldType FieldType = "TIME" | |
TODO: DateTimeFieldType FieldType = "DATETIME" | |
*/ | |
switch t.Kind() { | |
case reflect.Struct: | |
// Special handling of time.Time types. | |
if t.String() == "time.Time" { | |
record := newFieldSchema(fieldName, doc["Description"], false, bigquery.TimestampFieldType) | |
return record | |
} | |
// At this point, treat field like a regular struct record. | |
record := newFieldSchema(fieldName, doc["Description"], false, bigquery.RecordFieldType) | |
schema := []*bigquery.FieldSchema{} | |
for i := 0; i < t.NumField(); i++ { | |
f := t.Field(i) | |
name := getFieldName(f) | |
if name == "" { | |
continue | |
} | |
l := m.inferSchema(name, getDocName(f), f.Type) | |
if l != nil { | |
schema = append(schema, l) | |
} | |
} | |
record.Schema = schema | |
return record | |
case reflect.Slice: | |
record := newFieldSchema(fieldName, doc["Description"], true, bigquery.RecordFieldType) | |
// Lookup type of slice to perform reflection on that type. | |
t := t.Elem() | |
s := m.inferSchema(fieldName, docName, t) | |
// The slice type was for a primitive type, with no sub-schema. | |
if s.Schema == nil { | |
s.Repeated = true | |
return s | |
} | |
// The slice type was for a structure type. Copy the sub-schema. | |
record.Schema = s.Schema | |
return record | |
case reflect.String: | |
return newFieldSchema(fieldName, doc["Description"], false, bigquery.StringFieldType) | |
case reflect.Bool: | |
return newFieldSchema(fieldName, doc["Description"], false, bigquery.BooleanFieldType) | |
case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: | |
// TODO: check whether a type override is present in doc["Type"], e.g. Timestamp for Int. | |
return newFieldSchema(fieldName, doc["Description"], false, bigquery.IntegerFieldType) | |
case reflect.Float32, reflect.Float64: | |
return newFieldSchema(fieldName, doc["Description"], false, bigquery.FloatFieldType) | |
case reflect.Ptr: | |
t = t.Elem() | |
return m.inferSchema(fieldName, docName, t) | |
case reflect.Array: | |
// TODO: support reflect.Array | |
// TODO: identify special handling for BYTES field types? | |
panic("Array is not a supported field type") | |
case reflect.Map: | |
// We cannot support Maps without a separate impelementation of ValueSaver. | |
panic("Map is not a supported field type") | |
} | |
// Unknown type. | |
return nil | |
} | |
func main() { | |
sd := NewSchemaDoc("docs.txt") | |
pretty.Print(sd) | |
// Fields are documented with docs.txt. | |
n := schema.NDTResult{} | |
s := sd.InferSchema(n) | |
pretty.Print(s) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment