Black Lives Matter. Support the Equal Justice Initiative.

Source file src/mime/mediatype.go

Documentation: mime

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"sort"
    11  	"strings"
    12  	"unicode"
    13  )
    14  
    15  // FormatMediaType serializes mediatype t and the parameters
    16  // param as a media type conforming to RFC 2045 and RFC 2616.
    17  // The type and parameter names are written in lower-case.
    18  // When any of the arguments result in a standard violation then
    19  // FormatMediaType returns the empty string.
    20  func FormatMediaType(t string, param map[string]string) string {
    21  	var b strings.Builder
    22  	if slash := strings.IndexByte(t, '/'); slash == -1 {
    23  		if !isToken(t) {
    24  			return ""
    25  		}
    26  		b.WriteString(strings.ToLower(t))
    27  	} else {
    28  		major, sub := t[:slash], t[slash+1:]
    29  		if !isToken(major) || !isToken(sub) {
    30  			return ""
    31  		}
    32  		b.WriteString(strings.ToLower(major))
    33  		b.WriteByte('/')
    34  		b.WriteString(strings.ToLower(sub))
    35  	}
    36  
    37  	attrs := make([]string, 0, len(param))
    38  	for a := range param {
    39  		attrs = append(attrs, a)
    40  	}
    41  	sort.Strings(attrs)
    42  
    43  	for _, attribute := range attrs {
    44  		value := param[attribute]
    45  		b.WriteByte(';')
    46  		b.WriteByte(' ')
    47  		if !isToken(attribute) {
    48  			return ""
    49  		}
    50  		b.WriteString(strings.ToLower(attribute))
    51  
    52  		needEnc := needsEncoding(value)
    53  		if needEnc {
    54  			// RFC 2231 section 4
    55  			b.WriteByte('*')
    56  		}
    57  		b.WriteByte('=')
    58  
    59  		if needEnc {
    60  			b.WriteString("utf-8''")
    61  
    62  			offset := 0
    63  			for index := 0; index < len(value); index++ {
    64  				ch := value[index]
    65  				// {RFC 2231 section 7}
    66  				// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
    67  				if ch <= ' ' || ch >= 0x7F ||
    68  					ch == '*' || ch == '\'' || ch == '%' ||
    69  					isTSpecial(rune(ch)) {
    70  
    71  					b.WriteString(value[offset:index])
    72  					offset = index + 1
    73  
    74  					b.WriteByte('%')
    75  					b.WriteByte(upperhex[ch>>4])
    76  					b.WriteByte(upperhex[ch&0x0F])
    77  				}
    78  			}
    79  			b.WriteString(value[offset:])
    80  			continue
    81  		}
    82  
    83  		if isToken(value) {
    84  			b.WriteString(value)
    85  			continue
    86  		}
    87  
    88  		b.WriteByte('"')
    89  		offset := 0
    90  		for index := 0; index < len(value); index++ {
    91  			character := value[index]
    92  			if character == '"' || character == '\\' {
    93  				b.WriteString(value[offset:index])
    94  				offset = index
    95  				b.WriteByte('\\')
    96  			}
    97  		}
    98  		b.WriteString(value[offset:])
    99  		b.WriteByte('"')
   100  	}
   101  	return b.String()
   102  }
   103  
   104  func checkMediaTypeDisposition(s string) error {
   105  	typ, rest := consumeToken(s)
   106  	if typ == "" {
   107  		return errors.New("mime: no media type")
   108  	}
   109  	if rest == "" {
   110  		return nil
   111  	}
   112  	if !strings.HasPrefix(rest, "/") {
   113  		return errors.New("mime: expected slash after first token")
   114  	}
   115  	subtype, rest := consumeToken(rest[1:])
   116  	if subtype == "" {
   117  		return errors.New("mime: expected token after slash")
   118  	}
   119  	if rest != "" {
   120  		return errors.New("mime: unexpected content after media subtype")
   121  	}
   122  	return nil
   123  }
   124  
   125  // ErrInvalidMediaParameter is returned by ParseMediaType if
   126  // the media type value was found but there was an error parsing
   127  // the optional parameters
   128  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
   129  
   130  // ParseMediaType parses a media type value and any optional
   131  // parameters, per RFC 1521.  Media types are the values in
   132  // Content-Type and Content-Disposition headers (RFC 2183).
   133  // On success, ParseMediaType returns the media type converted
   134  // to lowercase and trimmed of white space and a non-nil map.
   135  // If there is an error parsing the optional parameter,
   136  // the media type will be returned along with the error
   137  // ErrInvalidMediaParameter.
   138  // The returned map, params, maps from the lowercase
   139  // attribute to the attribute value with its case preserved.
   140  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   141  	i := strings.Index(v, ";")
   142  	if i == -1 {
   143  		i = len(v)
   144  	}
   145  	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
   146  
   147  	err = checkMediaTypeDisposition(mediatype)
   148  	if err != nil {
   149  		return "", nil, err
   150  	}
   151  
   152  	params = make(map[string]string)
   153  
   154  	// Map of base parameter name -> parameter name -> value
   155  	// for parameters containing a '*' character.
   156  	// Lazily initialized.
   157  	var continuation map[string]map[string]string
   158  
   159  	v = v[i:]
   160  	for len(v) > 0 {
   161  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   162  		if len(v) == 0 {
   163  			break
   164  		}
   165  		key, value, rest := consumeMediaParam(v)
   166  		if key == "" {
   167  			if strings.TrimSpace(rest) == ";" {
   168  				// Ignore trailing semicolons.
   169  				// Not an error.
   170  				return
   171  			}
   172  			// Parse error.
   173  			return mediatype, nil, ErrInvalidMediaParameter
   174  		}
   175  
   176  		pmap := params
   177  		if idx := strings.Index(key, "*"); idx != -1 {
   178  			baseName := key[:idx]
   179  			if continuation == nil {
   180  				continuation = make(map[string]map[string]string)
   181  			}
   182  			var ok bool
   183  			if pmap, ok = continuation[baseName]; !ok {
   184  				continuation[baseName] = make(map[string]string)
   185  				pmap = continuation[baseName]
   186  			}
   187  		}
   188  		if _, exists := pmap[key]; exists {
   189  			// Duplicate parameter name is bogus.
   190  			return "", nil, errors.New("mime: duplicate parameter name")
   191  		}
   192  		pmap[key] = value
   193  		v = rest
   194  	}
   195  
   196  	// Stitch together any continuations or things with stars
   197  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   198  	var buf strings.Builder
   199  	for key, pieceMap := range continuation {
   200  		singlePartKey := key + "*"
   201  		if v, ok := pieceMap[singlePartKey]; ok {
   202  			if decv, ok := decode2231Enc(v); ok {
   203  				params[key] = decv
   204  			}
   205  			continue
   206  		}
   207  
   208  		buf.Reset()
   209  		valid := false
   210  		for n := 0; ; n++ {
   211  			simplePart := fmt.Sprintf("%s*%d", key, n)
   212  			if v, ok := pieceMap[simplePart]; ok {
   213  				valid = true
   214  				buf.WriteString(v)
   215  				continue
   216  			}
   217  			encodedPart := simplePart + "*"
   218  			v, ok := pieceMap[encodedPart]
   219  			if !ok {
   220  				break
   221  			}
   222  			valid = true
   223  			if n == 0 {
   224  				if decv, ok := decode2231Enc(v); ok {
   225  					buf.WriteString(decv)
   226  				}
   227  			} else {
   228  				decv, _ := percentHexUnescape(v)
   229  				buf.WriteString(decv)
   230  			}
   231  		}
   232  		if valid {
   233  			params[key] = buf.String()
   234  		}
   235  	}
   236  
   237  	return
   238  }
   239  
   240  func decode2231Enc(v string) (string, bool) {
   241  	sv := strings.SplitN(v, "'", 3)
   242  	if len(sv) != 3 {
   243  		return "", false
   244  	}
   245  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   246  	// need to decide how to expose it in the API. But I'm not sure
   247  	// anybody uses it in practice.
   248  	charset := strings.ToLower(sv[0])
   249  	if len(charset) == 0 {
   250  		return "", false
   251  	}
   252  	if charset != "us-ascii" && charset != "utf-8" {
   253  		// TODO: unsupported encoding
   254  		return "", false
   255  	}
   256  	encv, err := percentHexUnescape(sv[2])
   257  	if err != nil {
   258  		return "", false
   259  	}
   260  	return encv, true
   261  }
   262  
   263  func isNotTokenChar(r rune) bool {
   264  	return !isTokenChar(r)
   265  }
   266  
   267  // consumeToken consumes a token from the beginning of provided
   268  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   269  // the token consumed and the rest of the string. Returns ("", v) on
   270  // failure to consume at least one character.
   271  func consumeToken(v string) (token, rest string) {
   272  	notPos := strings.IndexFunc(v, isNotTokenChar)
   273  	if notPos == -1 {
   274  		return v, ""
   275  	}
   276  	if notPos == 0 {
   277  		return "", v
   278  	}
   279  	return v[0:notPos], v[notPos:]
   280  }
   281  
   282  // consumeValue consumes a "value" per RFC 2045, where a value is
   283  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   284  // returns the value consumed (and de-quoted/escaped, if a
   285  // quoted-string) and the rest of the string. On failure, returns
   286  // ("", v).
   287  func consumeValue(v string) (value, rest string) {
   288  	if v == "" {
   289  		return
   290  	}
   291  	if v[0] != '"' {
   292  		return consumeToken(v)
   293  	}
   294  
   295  	// parse a quoted-string
   296  	buffer := new(strings.Builder)
   297  	for i := 1; i < len(v); i++ {
   298  		r := v[i]
   299  		if r == '"' {
   300  			return buffer.String(), v[i+1:]
   301  		}
   302  		// When MSIE sends a full file path (in "intranet mode"), it does not
   303  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   304  		//
   305  		// No known MIME generators emit unnecessary backslash escapes
   306  		// for simple token characters like numbers and letters.
   307  		//
   308  		// If we see an unnecessary backslash escape, assume it is from MSIE
   309  		// and intended as a literal backslash. This makes Go servers deal better
   310  		// with MSIE without affecting the way they handle conforming MIME
   311  		// generators.
   312  		if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) {
   313  			buffer.WriteByte(v[i+1])
   314  			i++
   315  			continue
   316  		}
   317  		if r == '\r' || r == '\n' {
   318  			return "", v
   319  		}
   320  		buffer.WriteByte(v[i])
   321  	}
   322  	// Did not find end quote.
   323  	return "", v
   324  }
   325  
   326  func consumeMediaParam(v string) (param, value, rest string) {
   327  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   328  	if !strings.HasPrefix(rest, ";") {
   329  		return "", "", v
   330  	}
   331  
   332  	rest = rest[1:] // consume semicolon
   333  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   334  	param, rest = consumeToken(rest)
   335  	param = strings.ToLower(param)
   336  	if param == "" {
   337  		return "", "", v
   338  	}
   339  
   340  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   341  	if !strings.HasPrefix(rest, "=") {
   342  		return "", "", v
   343  	}
   344  	rest = rest[1:] // consume equals sign
   345  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   346  	value, rest2 := consumeValue(rest)
   347  	if value == "" && rest2 == rest {
   348  		return "", "", v
   349  	}
   350  	rest = rest2
   351  	return param, value, rest
   352  }
   353  
   354  func percentHexUnescape(s string) (string, error) {
   355  	// Count %, check that they're well-formed.
   356  	percents := 0
   357  	for i := 0; i < len(s); {
   358  		if s[i] != '%' {
   359  			i++
   360  			continue
   361  		}
   362  		percents++
   363  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   364  			s = s[i:]
   365  			if len(s) > 3 {
   366  				s = s[0:3]
   367  			}
   368  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   369  		}
   370  		i += 3
   371  	}
   372  	if percents == 0 {
   373  		return s, nil
   374  	}
   375  
   376  	t := make([]byte, len(s)-2*percents)
   377  	j := 0
   378  	for i := 0; i < len(s); {
   379  		switch s[i] {
   380  		case '%':
   381  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   382  			j++
   383  			i += 3
   384  		default:
   385  			t[j] = s[i]
   386  			j++
   387  			i++
   388  		}
   389  	}
   390  	return string(t), nil
   391  }
   392  
   393  func ishex(c byte) bool {
   394  	switch {
   395  	case '0' <= c && c <= '9':
   396  		return true
   397  	case 'a' <= c && c <= 'f':
   398  		return true
   399  	case 'A' <= c && c <= 'F':
   400  		return true
   401  	}
   402  	return false
   403  }
   404  
   405  func unhex(c byte) byte {
   406  	switch {
   407  	case '0' <= c && c <= '9':
   408  		return c - '0'
   409  	case 'a' <= c && c <= 'f':
   410  		return c - 'a' + 10
   411  	case 'A' <= c && c <= 'F':
   412  		return c - 'A' + 10
   413  	}
   414  	return 0
   415  }
   416  

View as plain text