Source file src/html/template/context.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"fmt"
     9  	"slices"
    10  	"text/template/parse"
    11  )
    12  
    13  // context describes the state an HTML parser must be in when it reaches the
    14  // portion of HTML produced by evaluating a particular template node.
    15  //
    16  // The zero value of type context is the start context for a template that
    17  // produces an HTML fragment as defined at
    18  // https://www.w3.org/TR/html5/syntax.html#the-end
    19  // where the context element is null.
    20  type context struct {
    21  	state   state
    22  	delim   delim
    23  	urlPart urlPart
    24  	jsCtx   jsCtx
    25  	// jsBraceDepth contains the current depth, for each JS template literal
    26  	// string interpolation expression, of braces we've seen. This is used to
    27  	// determine if the next } will close a JS template literal string
    28  	// interpolation expression or not.
    29  	jsBraceDepth []int
    30  	attr         attr
    31  	element      element
    32  	n            parse.Node // for range break/continue
    33  	err          *Error
    34  }
    35  
    36  func (c context) String() string {
    37  	var err error
    38  	if c.err != nil {
    39  		err = c.err
    40  	}
    41  	return fmt.Sprintf("{%v %v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.jsBraceDepth, c.attr, c.element, err)
    42  }
    43  
    44  // eq reports whether two contexts are equal.
    45  func (c context) eq(d context) bool {
    46  	return c.state == d.state &&
    47  		c.delim == d.delim &&
    48  		c.urlPart == d.urlPart &&
    49  		c.jsCtx == d.jsCtx &&
    50  		slices.Equal(c.jsBraceDepth, d.jsBraceDepth) &&
    51  		c.attr == d.attr &&
    52  		c.element == d.element &&
    53  		c.err == d.err
    54  }
    55  
    56  // mangle produces an identifier that includes a suffix that distinguishes it
    57  // from template names mangled with different contexts.
    58  func (c context) mangle(templateName string) string {
    59  	// The mangled name for the default context is the input templateName.
    60  	if c.state == stateText {
    61  		return templateName
    62  	}
    63  	s := templateName + "$htmltemplate_" + c.state.String()
    64  	if c.delim != delimNone {
    65  		s += "_" + c.delim.String()
    66  	}
    67  	if c.urlPart != urlPartNone {
    68  		s += "_" + c.urlPart.String()
    69  	}
    70  	if c.jsCtx != jsCtxRegexp {
    71  		s += "_" + c.jsCtx.String()
    72  	}
    73  	if c.jsBraceDepth != nil {
    74  		s += fmt.Sprintf("_jsBraceDepth(%v)", c.jsBraceDepth)
    75  	}
    76  	if c.attr != attrNone {
    77  		s += "_" + c.attr.String()
    78  	}
    79  	if c.element != elementNone {
    80  		s += "_" + c.element.String()
    81  	}
    82  	return s
    83  }
    84  
    85  // clone returns a copy of c with the same field values.
    86  func (c context) clone() context {
    87  	clone := c
    88  	clone.jsBraceDepth = slices.Clone(c.jsBraceDepth)
    89  	return clone
    90  }
    91  
    92  // state describes a high-level HTML parser state.
    93  //
    94  // It bounds the top of the element stack, and by extension the HTML insertion
    95  // mode, but also contains state that does not correspond to anything in the
    96  // HTML5 parsing algorithm because a single token production in the HTML
    97  // grammar may contain embedded actions in a template. For instance, the quoted
    98  // HTML attribute produced by
    99  //
   100  //	<div title="Hello {{.World}}">
   101  //
   102  // is a single token in HTML's grammar but in a template spans several nodes.
   103  type state uint8
   104  
   105  //go:generate stringer -type state
   106  
   107  const (
   108  	// stateText is parsed character data. An HTML parser is in
   109  	// this state when its parse position is outside an HTML tag,
   110  	// directive, comment, and special element body.
   111  	stateText state = iota
   112  	// stateTag occurs before an HTML attribute or the end of a tag.
   113  	stateTag
   114  	// stateAttrName occurs inside an attribute name.
   115  	// It occurs between the ^'s in ` ^name^ = value`.
   116  	stateAttrName
   117  	// stateAfterName occurs after an attr name has ended but before any
   118  	// equals sign. It occurs between the ^'s in ` name^ ^= value`.
   119  	stateAfterName
   120  	// stateBeforeValue occurs after the equals sign but before the value.
   121  	// It occurs between the ^'s in ` name =^ ^value`.
   122  	stateBeforeValue
   123  	// stateHTMLCmt occurs inside an <!-- HTML comment -->.
   124  	stateHTMLCmt
   125  	// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
   126  	// as described at https://www.w3.org/TR/html5/syntax.html#elements-0
   127  	stateRCDATA
   128  	// stateAttr occurs inside an HTML attribute whose content is text.
   129  	stateAttr
   130  	// stateURL occurs inside an HTML attribute whose content is a URL.
   131  	stateURL
   132  	// stateSrcset occurs inside an HTML srcset attribute.
   133  	stateSrcset
   134  	// stateJS occurs inside an event handler or script element.
   135  	stateJS
   136  	// stateJSDqStr occurs inside a JavaScript double quoted string.
   137  	stateJSDqStr
   138  	// stateJSSqStr occurs inside a JavaScript single quoted string.
   139  	stateJSSqStr
   140  	// stateJSTmplLit occurs inside a JavaScript back quoted string.
   141  	stateJSTmplLit
   142  	// stateJSRegexp occurs inside a JavaScript regexp literal.
   143  	stateJSRegexp
   144  	// stateJSBlockCmt occurs inside a JavaScript /* block comment */.
   145  	stateJSBlockCmt
   146  	// stateJSLineCmt occurs inside a JavaScript // line comment.
   147  	stateJSLineCmt
   148  	// stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment.
   149  	stateJSHTMLOpenCmt
   150  	// stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment.
   151  	stateJSHTMLCloseCmt
   152  	// stateCSS occurs inside a <style> element or style attribute.
   153  	stateCSS
   154  	// stateCSSDqStr occurs inside a CSS double quoted string.
   155  	stateCSSDqStr
   156  	// stateCSSSqStr occurs inside a CSS single quoted string.
   157  	stateCSSSqStr
   158  	// stateCSSDqURL occurs inside a CSS double quoted url("...").
   159  	stateCSSDqURL
   160  	// stateCSSSqURL occurs inside a CSS single quoted url('...').
   161  	stateCSSSqURL
   162  	// stateCSSURL occurs inside a CSS unquoted url(...).
   163  	stateCSSURL
   164  	// stateCSSBlockCmt occurs inside a CSS /* block comment */.
   165  	stateCSSBlockCmt
   166  	// stateCSSLineCmt occurs inside a CSS // line comment.
   167  	stateCSSLineCmt
   168  	// stateError is an infectious error state outside any valid
   169  	// HTML/CSS/JS construct.
   170  	stateError
   171  	// stateMetaContent occurs inside a HTML meta element content attribute.
   172  	stateMetaContent
   173  	// stateMetaContentURL occurs inside a "url=" tag in a HTML meta element content attribute.
   174  	stateMetaContentURL
   175  	// stateDead marks unreachable code after a {{break}} or {{continue}}.
   176  	stateDead
   177  )
   178  
   179  // isComment is true for any state that contains content meant for template
   180  // authors & maintainers, not for end-users or machines.
   181  func isComment(s state) bool {
   182  	switch s {
   183  	case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt:
   184  		return true
   185  	}
   186  	return false
   187  }
   188  
   189  // isInTag return whether s occurs solely inside an HTML tag.
   190  func isInTag(s state) bool {
   191  	switch s {
   192  	case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
   193  		return true
   194  	}
   195  	return false
   196  }
   197  
   198  // isInScriptLiteral returns true if s is one of the literal states within a
   199  // <script> tag, and as such occurrences of "<!--", "<script", and "</script"
   200  // need to be treated specially.
   201  func isInScriptLiteral(s state) bool {
   202  	// Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
   203  	// stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
   204  	// omitted from the output.
   205  	switch s {
   206  	case stateJSDqStr, stateJSSqStr, stateJSTmplLit, stateJSRegexp:
   207  		return true
   208  	}
   209  	return false
   210  }
   211  
   212  // delim is the delimiter that will end the current HTML attribute.
   213  type delim uint8
   214  
   215  //go:generate stringer -type delim
   216  
   217  const (
   218  	// delimNone occurs outside any attribute.
   219  	delimNone delim = iota
   220  	// delimDoubleQuote occurs when a double quote (") closes the attribute.
   221  	delimDoubleQuote
   222  	// delimSingleQuote occurs when a single quote (') closes the attribute.
   223  	delimSingleQuote
   224  	// delimSpaceOrTagEnd occurs when a space or right angle bracket (>)
   225  	// closes the attribute.
   226  	delimSpaceOrTagEnd
   227  )
   228  
   229  // urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
   230  // encoding strategies.
   231  type urlPart uint8
   232  
   233  //go:generate stringer -type urlPart
   234  
   235  const (
   236  	// urlPartNone occurs when not in a URL, or possibly at the start:
   237  	// ^ in "^http://auth/path?k=v#frag".
   238  	urlPartNone urlPart = iota
   239  	// urlPartPreQuery occurs in the scheme, authority, or path; between the
   240  	// ^s in "h^ttp://auth/path^?k=v#frag".
   241  	urlPartPreQuery
   242  	// urlPartQueryOrFrag occurs in the query portion between the ^s in
   243  	// "http://auth/path?^k=v#frag^".
   244  	urlPartQueryOrFrag
   245  	// urlPartUnknown occurs due to joining of contexts both before and
   246  	// after the query separator.
   247  	urlPartUnknown
   248  )
   249  
   250  // jsCtx determines whether a '/' starts a regular expression literal or a
   251  // division operator.
   252  type jsCtx uint8
   253  
   254  //go:generate stringer -type jsCtx
   255  
   256  const (
   257  	// jsCtxRegexp occurs where a '/' would start a regexp literal.
   258  	jsCtxRegexp jsCtx = iota
   259  	// jsCtxDivOp occurs where a '/' would start a division operator.
   260  	jsCtxDivOp
   261  	// jsCtxUnknown occurs where a '/' is ambiguous due to context joining.
   262  	jsCtxUnknown
   263  )
   264  
   265  // element identifies the HTML element when inside a start tag or special body.
   266  // Certain HTML element (for example <script> and <style>) have bodies that are
   267  // treated differently from stateText so the element type is necessary to
   268  // transition into the correct context at the end of a tag and to identify the
   269  // end delimiter for the body.
   270  type element uint8
   271  
   272  //go:generate stringer -type element
   273  
   274  const (
   275  	// elementNone occurs outside a special tag or special element body.
   276  	elementNone element = iota
   277  	// elementScript corresponds to the raw text <script> element
   278  	// with JS MIME type or no type attribute.
   279  	elementScript
   280  	// elementStyle corresponds to the raw text <style> element.
   281  	elementStyle
   282  	// elementTextarea corresponds to the RCDATA <textarea> element.
   283  	elementTextarea
   284  	// elementTitle corresponds to the RCDATA <title> element.
   285  	elementTitle
   286  	// elementMeta corresponds to the HTML <meta> element.
   287  	elementMeta
   288  )
   289  
   290  //go:generate stringer -type attr
   291  
   292  // attr identifies the current HTML attribute when inside the attribute,
   293  // that is, starting from stateAttrName until stateTag/stateText (exclusive).
   294  type attr uint8
   295  
   296  const (
   297  	// attrNone corresponds to a normal attribute or no attribute.
   298  	attrNone attr = iota
   299  	// attrScript corresponds to an event handler attribute.
   300  	attrScript
   301  	// attrScriptType corresponds to the type attribute in script HTML element
   302  	attrScriptType
   303  	// attrStyle corresponds to the style attribute whose value is CSS.
   304  	attrStyle
   305  	// attrURL corresponds to an attribute whose value is a URL.
   306  	attrURL
   307  	// attrSrcset corresponds to a srcset attribute.
   308  	attrSrcset
   309  	// attrMetaContent corresponds to the content attribute in meta HTML element.
   310  	attrMetaContent
   311  )
   312  

View as plain text