1 // Go support for Protocol Buffers - Google's data interchange format
3 // Copyright 2010 Google Inc. All rights reserved.
4 // http://code.google.com/p/goprotobuf/
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 // Functions for parsing the Text protocol buffer format.
35 // TODO: message sets, extensions.
44 // ParseError satisfies the os.Error interface.
45 type ParseError struct {
47 Line int // 1-based line number
48 Offset int // 0-based byte offset from start of input
51 func (p *ParseError) String() string {
53 // show offset only for first line
54 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
56 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
62 line int // line number
63 offset int // byte number from start of input, not start of line
64 unquoted string // the unquoted version of value, if it was a quoted string
67 func (t *token) String() string {
69 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
71 return fmt.Sprintf("parse error: %v", t.err)
74 type textParser struct {
75 s string // remaining input
76 done bool // whether the parsing is finished (success or error)
77 backed bool // whether back() was called
82 func newTextParser(s string) *textParser {
90 func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
91 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
97 // Numbers and identifiers are matched by [-+._A-Za-z0-9]
98 func isIdentOrNumberChar(c byte) bool {
100 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
102 case '0' <= c && c <= '9':
106 case '-', '+', '.', '_':
112 func isWhitespace(c byte) bool {
114 case ' ', '\t', '\n', '\r':
120 func (p *textParser) skipWhitespace() {
122 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
124 // comment; skip to end of line or input
125 for i < len(p.s) && p.s[i] != '\n' {
138 p.s = p.s[i:len(p.s)]
144 func (p *textParser) advance() {
151 // Start of non-whitespace
153 p.cur.offset, p.cur.line = p.offset, p.line
156 case '<', '>', '{', '}', ':':
158 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
162 for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
163 if p.s[i] == '\\' && i+1 < len(p.s) {
169 if i >= len(p.s) || p.s[i] != '"' {
170 p.errorf("unmatched quote")
173 // TODO: Should be UnquoteC.
174 unq, err := strconv.Unquote(p.s[0 : i+1])
176 p.errorf("invalid quoted string %v", p.s[0:i+1])
179 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
183 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
187 p.errorf("unexpected byte %#x", p.s[0])
190 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
192 p.offset += len(p.cur.value)
195 // Back off the parser by one token. Can only be done between calls to next().
196 // It makes the next advance() a no-op.
197 func (p *textParser) back() { p.backed = true }
199 // Advances the parser and returns the new current token.
200 func (p *textParser) next() *token {
201 if p.backed || p.done {
208 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
209 // Look for multiple quoted strings separated by whitespace,
210 // and concatenate them.
214 if p.done || p.s[0] != '"' {
218 if p.cur.err != nil {
221 cat.value += " " + p.cur.value
222 cat.unquoted += p.cur.unquoted
224 p.done = false // parser may have seen EOF, but we want to return cat
230 // Return an error indicating which required field was not set.
231 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
233 sprops := GetProperties(st)
234 for i := 0; i < st.NumField(); i++ {
235 if !isNil(sv.Field(i)) {
239 props := sprops.Prop[i]
241 return p.errorf("message %v missing required field %q", st, props.OrigName)
244 return p.errorf("message %v missing required field", st) // should not happen
247 // Returns the index in the struct for the named field, as well as the parsed tag properties.
248 func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
249 sprops := GetProperties(st)
250 i, ok := sprops.origNames[name]
252 return i, sprops.Prop[i], true
254 return -1, nil, false
257 func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
259 reqCount := GetProperties(st).reqCount
260 // A struct is a sequence of "name: value", terminated by one of
261 // '>' or '}', or the end of the input.
267 if tok.value == terminator {
271 fi, props, ok := structFieldByName(st, tok.value)
273 return p.errorf("unknown field name %q in %v", tok.value, st)
276 // Check that it's not already set if it's not a repeated field.
277 if !props.Repeated && !isNil(sv.Field(fi)) {
278 return p.errorf("non-repeated field %q was repeated", tok.value)
285 if tok.value != ":" {
286 // Colon is optional when the field is a group or message.
292 // A "bytes" field is either a message, a string, or a repeated field;
293 // those three become *T, *string and []T respectively, so we can check for
294 // this field being a pointer to a non-string.
295 typ := st.Field(fi).Type
296 if typ.Kind() == reflect.Ptr {
298 if typ.Elem().Kind() == reflect.String {
301 } else if typ.Kind() == reflect.Slice {
303 if typ.Elem().Kind() != reflect.Ptr {
310 return p.errorf("expected ':', found %q", tok.value)
315 // Parse into the field.
316 if err := p.readAny(sv.Field(fi), props); err != nil {
326 return p.missingRequiredFieldError(sv)
334 maxUint32 = 1<<32 - 1
337 func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
343 return p.errorf("unexpected EOF")
346 switch fv := v; fv.Kind() {
349 if at.Elem().Kind() == reflect.Uint8 {
350 // Special case for []byte
351 if tok.value[0] != '"' {
352 // Deliberately written out here, as the error after
353 // this switch statement would write "invalid []byte: ...",
354 // which is not as user-friendly.
355 return p.errorf("invalid string: %v", tok.value)
357 bytes := []byte(tok.unquoted)
358 fv.Set(reflect.ValueOf(bytes))
361 // Repeated field. May already exist.
363 if flen == fv.Cap() {
364 nav := reflect.MakeSlice(at, flen, 2*flen+1)
365 reflect.Copy(nav, fv)
372 return p.readAny(fv.Index(flen), nil) // TODO: pass properties?
374 // Either "true", "false", 1 or 0.
383 case reflect.Float32, reflect.Float64:
384 if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
389 if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
393 if len(props.Enum) == 0 {
396 m, ok := enumValueMaps[props.Enum]
400 x, ok := m[tok.value]
407 if x, err := strconv.Atoi64(tok.value); err == nil {
412 // A basic field (indirected through pointer), or a repeated message/group
414 fv.Set(reflect.New(fv.Type().Elem()))
415 return p.readAny(fv.Elem(), props)
417 if tok.value[0] == '"' {
418 fv.SetString(tok.unquoted)
422 var terminator string
429 return p.errorf("expected '{' or '<', found %q", tok.value)
431 return p.readStruct(fv, terminator)
433 if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
434 fv.SetUint(uint64(x))
438 if x, err := strconv.Atoui64(tok.value); err == nil {
443 return p.errorf("invalid %v: %v", v.Type(), tok.value)
446 var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
448 // UnmarshalText reads a protobuffer in Text format.
449 func UnmarshalText(s string, pb interface{}) os.Error {
450 v := reflect.ValueOf(pb)
451 if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
454 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {