Created
April 8, 2019 00:06
-
-
Save coyove/75e28e2175afb187f8c99fb73a8a056f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2015-2019 Brett Vickers. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
// Package etree provides XML services through an Element Tree | |
// abstraction. | |
package xmlwrap | |
import ( | |
"bytes" | |
"encoding/xml" | |
"errors" | |
"io" | |
"reflect" | |
"sort" | |
"strings" | |
) | |
// ErrXML is returned when XML parsing fails due to incorrect formatting. | |
var ErrXML = errors.New("etree: invalid XML format") | |
// A Document is a container holding a complete XML tree. | |
// | |
// A document has a single embedded element, which contains zero or more child | |
// tokens, one of which is usually the root element. The embedded element may | |
// include other children such as processing instruction tokens or character | |
// data tokens. The document's embedded element is never directly serialized; | |
// only its children are. | |
// | |
// A document also contains read and write settings, which influence the way | |
// the document is deserialized, serialized, and indented. | |
type Document struct { | |
Element | |
} | |
// An Element represents an XML element, its attributes, and its child tokens. | |
type Element struct { | |
Name xml.Name | |
CharData string | |
attrs [][2]string // key-value attribute pairs | |
Children []*Element // child tokens (elements, comments, etc.) | |
Line, Col int | |
} | |
func (e *Element) addAttr(k, v string, check bool) { | |
if check { | |
for i, a := range e.attrs { | |
if a[0] == k { | |
e.attrs[i] = [2]string{k, v} | |
return | |
} | |
} | |
} | |
e.attrs = append(e.attrs, [2]string{k, v}) | |
} | |
func (e *Element) Attr(k string) (string, bool) { | |
for _, a := range e.attrs { | |
if a[0] == k { | |
return a[1], true | |
} | |
} | |
return "", false | |
} | |
// NewDocument creates an XML document without a root element. | |
func NewDocument() *Document { | |
return &Document{ | |
Element: Element{Children: make([]*Element, 0)}, | |
} | |
} | |
// ReadFrom reads XML from the reader 'r' into this document. The function | |
// returns the number of bytes read and any error encountered. | |
func (d *Document) ReadFrom(r io.Reader) (n int64, err error) { | |
return d.Element.readFrom(r) | |
} | |
// ReadFromString reads XML from the string 's' into this document. | |
func (d *Document) ReadFromString(s string) error { | |
_, err := d.ReadFrom(strings.NewReader(s)) | |
return err | |
} | |
type stack struct { | |
data []*Element | |
} | |
func (s *stack) empty() bool { | |
return len(s.data) == 0 | |
} | |
func (s *stack) push(value *Element) { | |
s.data = append(s.data, value) | |
} | |
func (s *stack) pop() *Element { | |
value := s.data[len(s.data)-1] | |
s.data[len(s.data)-1] = nil | |
s.data = s.data[:len(s.data)-1] | |
return value | |
} | |
func (s *stack) peek() *Element { | |
return s.data[len(s.data)-1] | |
} | |
type lineReader struct { | |
r io.Reader | |
read int | |
lines []int | |
} | |
func (lr *lineReader) Read(p []byte) (int, error) { | |
n, err := lr.r.Read(p) | |
for i := 0; i < n; i++ { | |
if p[i] == '\n' { | |
lr.lines = append(lr.lines, lr.read+i) | |
} | |
} | |
lr.read += n | |
return n, err | |
} | |
func (lr *lineReader) Get(offset int) (line, col int) { | |
idx := sort.Search(len(lr.lines), func(i int) bool { | |
return offset <= lr.lines[i] | |
}) | |
if idx < len(lr.lines) { | |
if idx == 0 { | |
return idx + 1, offset | |
} | |
return idx + 1, offset - lr.lines[idx-1] | |
} | |
return len(lr.lines) + 1, offset - lr.lines[len(lr.lines)-1] | |
} | |
// ReadFrom reads XML from the reader ;ri' and stores the result as a new | |
// child of this element. | |
func (e *Element) readFrom(ri io.Reader) (n int64, err error) { | |
r := &lineReader{r: ri, lines: []int{}} | |
dec := xml.NewDecoder(r) | |
rdec := reflect.ValueOf(dec).Elem().FieldByName("offset") | |
var stack stack | |
stack.push(e) | |
for { | |
t, err := dec.RawToken() | |
switch { | |
case err == io.EOF: | |
return 0, nil | |
case err != nil: | |
return 0, err | |
case stack.empty(): | |
return 0, ErrXML | |
} | |
top := stack.peek() | |
switch t := t.(type) { | |
case xml.StartElement: | |
e := &Element{ | |
Name: t.Name, | |
attrs: make([][2]string, 0), | |
Children: make([]*Element, 0), | |
} | |
e.Line, e.Col = r.Get(int(rdec.Int())) | |
for _, a := range t.Attr { | |
e.addAttr(a.Name.Local, a.Value, false) | |
} | |
stack.push(e) | |
top.Children = append(top.Children, e) | |
case xml.EndElement: | |
stack.pop() | |
case xml.CharData: | |
top.CharData += string(t) | |
// var flags charDataFlags | |
//newCharData(data, flags, top) | |
// case xml.Comment: | |
// newComment(string(t), top) | |
// case xml.Directive: | |
// newDirective(string(t), top) | |
// case xml.ProcInst: | |
// newProcInst(t.Target, string(t.Inst), top) | |
} | |
} | |
} | |
func (e *Element) Text() string { | |
var p bytes.Buffer | |
p.WriteString(e.CharData) | |
// for _, c := range e.Children { | |
// p.WriteString(c.Text()) | |
// } | |
return p.String() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment