
This upgrades all of libgo other than the runtime package to the Go 1.4 release. In Go 1.4 much of the runtime was rewritten into Go. Merging that code will take more time and will not change the API, so I'm putting it off for now. There are a few runtime changes anyhow, to accomodate other packages that rely on minor modifications to the runtime support. The compiler changes slightly to add a one-bit flag to each type descriptor kind that is stored directly in an interface, which for gccgo is currently only pointer types. Another one-bit flag (gcprog) is reserved because it is used by the gc compiler, but gccgo does not currently use it. There is another error check in the compiler since I ran across it during testing. gotools/: * Makefile.am (go_cmd_go_files): Sort entries. Add generate.go. * Makefile.in: Rebuild. From-SVN: r219627
319 lines
7.6 KiB
Go
319 lines
7.6 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package syntax
|
|
|
|
// Note to implementers:
|
|
// In this package, re is always a *Regexp and r is always a rune.
|
|
|
|
import (
|
|
"bytes"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// A Regexp is a node in a regular expression syntax tree.
|
|
type Regexp struct {
|
|
Op Op // operator
|
|
Flags Flags
|
|
Sub []*Regexp // subexpressions, if any
|
|
Sub0 [1]*Regexp // storage for short Sub
|
|
Rune []rune // matched runes, for OpLiteral, OpCharClass
|
|
Rune0 [2]rune // storage for short Rune
|
|
Min, Max int // min, max for OpRepeat
|
|
Cap int // capturing index, for OpCapture
|
|
Name string // capturing name, for OpCapture
|
|
}
|
|
|
|
// An Op is a single regular expression operator.
|
|
type Op uint8
|
|
|
|
// Operators are listed in precedence order, tightest binding to weakest.
|
|
// Character class operators are listed simplest to most complex
|
|
// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
|
|
|
|
const (
|
|
OpNoMatch Op = 1 + iota // matches no strings
|
|
OpEmptyMatch // matches empty string
|
|
OpLiteral // matches Runes sequence
|
|
OpCharClass // matches Runes interpreted as range pair list
|
|
OpAnyCharNotNL // matches any character except newline
|
|
OpAnyChar // matches any character
|
|
OpBeginLine // matches empty string at beginning of line
|
|
OpEndLine // matches empty string at end of line
|
|
OpBeginText // matches empty string at beginning of text
|
|
OpEndText // matches empty string at end of text
|
|
OpWordBoundary // matches word boundary `\b`
|
|
OpNoWordBoundary // matches word non-boundary `\B`
|
|
OpCapture // capturing subexpression with index Cap, optional name Name
|
|
OpStar // matches Sub[0] zero or more times
|
|
OpPlus // matches Sub[0] one or more times
|
|
OpQuest // matches Sub[0] zero or one times
|
|
OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
|
|
OpConcat // matches concatenation of Subs
|
|
OpAlternate // matches alternation of Subs
|
|
)
|
|
|
|
const opPseudo Op = 128 // where pseudo-ops start
|
|
|
|
// Equal returns true if x and y have identical structure.
|
|
func (x *Regexp) Equal(y *Regexp) bool {
|
|
if x == nil || y == nil {
|
|
return x == y
|
|
}
|
|
if x.Op != y.Op {
|
|
return false
|
|
}
|
|
switch x.Op {
|
|
case OpEndText:
|
|
// The parse flags remember whether this is \z or \Z.
|
|
if x.Flags&WasDollar != y.Flags&WasDollar {
|
|
return false
|
|
}
|
|
|
|
case OpLiteral, OpCharClass:
|
|
if len(x.Rune) != len(y.Rune) {
|
|
return false
|
|
}
|
|
for i, r := range x.Rune {
|
|
if r != y.Rune[i] {
|
|
return false
|
|
}
|
|
}
|
|
|
|
case OpAlternate, OpConcat:
|
|
if len(x.Sub) != len(y.Sub) {
|
|
return false
|
|
}
|
|
for i, sub := range x.Sub {
|
|
if !sub.Equal(y.Sub[i]) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
case OpStar, OpPlus, OpQuest:
|
|
if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
|
|
return false
|
|
}
|
|
|
|
case OpRepeat:
|
|
if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
|
|
return false
|
|
}
|
|
|
|
case OpCapture:
|
|
if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// writeRegexp writes the Perl syntax for the regular expression re to b.
|
|
func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|
switch re.Op {
|
|
default:
|
|
b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
|
|
case OpNoMatch:
|
|
b.WriteString(`[^\x00-\x{10FFFF}]`)
|
|
case OpEmptyMatch:
|
|
b.WriteString(`(?:)`)
|
|
case OpLiteral:
|
|
if re.Flags&FoldCase != 0 {
|
|
b.WriteString(`(?i:`)
|
|
}
|
|
for _, r := range re.Rune {
|
|
escape(b, r, false)
|
|
}
|
|
if re.Flags&FoldCase != 0 {
|
|
b.WriteString(`)`)
|
|
}
|
|
case OpCharClass:
|
|
if len(re.Rune)%2 != 0 {
|
|
b.WriteString(`[invalid char class]`)
|
|
break
|
|
}
|
|
b.WriteRune('[')
|
|
if len(re.Rune) == 0 {
|
|
b.WriteString(`^\x00-\x{10FFFF}`)
|
|
} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
|
|
// Contains 0 and MaxRune. Probably a negated class.
|
|
// Print the gaps.
|
|
b.WriteRune('^')
|
|
for i := 1; i < len(re.Rune)-1; i += 2 {
|
|
lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
|
|
escape(b, lo, lo == '-')
|
|
if lo != hi {
|
|
b.WriteRune('-')
|
|
escape(b, hi, hi == '-')
|
|
}
|
|
}
|
|
} else {
|
|
for i := 0; i < len(re.Rune); i += 2 {
|
|
lo, hi := re.Rune[i], re.Rune[i+1]
|
|
escape(b, lo, lo == '-')
|
|
if lo != hi {
|
|
b.WriteRune('-')
|
|
escape(b, hi, hi == '-')
|
|
}
|
|
}
|
|
}
|
|
b.WriteRune(']')
|
|
case OpAnyCharNotNL:
|
|
b.WriteString(`(?-s:.)`)
|
|
case OpAnyChar:
|
|
b.WriteString(`(?s:.)`)
|
|
case OpBeginLine:
|
|
b.WriteRune('^')
|
|
case OpEndLine:
|
|
b.WriteRune('$')
|
|
case OpBeginText:
|
|
b.WriteString(`\A`)
|
|
case OpEndText:
|
|
if re.Flags&WasDollar != 0 {
|
|
b.WriteString(`(?-m:$)`)
|
|
} else {
|
|
b.WriteString(`\z`)
|
|
}
|
|
case OpWordBoundary:
|
|
b.WriteString(`\b`)
|
|
case OpNoWordBoundary:
|
|
b.WriteString(`\B`)
|
|
case OpCapture:
|
|
if re.Name != "" {
|
|
b.WriteString(`(?P<`)
|
|
b.WriteString(re.Name)
|
|
b.WriteRune('>')
|
|
} else {
|
|
b.WriteRune('(')
|
|
}
|
|
if re.Sub[0].Op != OpEmptyMatch {
|
|
writeRegexp(b, re.Sub[0])
|
|
}
|
|
b.WriteRune(')')
|
|
case OpStar, OpPlus, OpQuest, OpRepeat:
|
|
if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
|
|
b.WriteString(`(?:`)
|
|
writeRegexp(b, sub)
|
|
b.WriteString(`)`)
|
|
} else {
|
|
writeRegexp(b, sub)
|
|
}
|
|
switch re.Op {
|
|
case OpStar:
|
|
b.WriteRune('*')
|
|
case OpPlus:
|
|
b.WriteRune('+')
|
|
case OpQuest:
|
|
b.WriteRune('?')
|
|
case OpRepeat:
|
|
b.WriteRune('{')
|
|
b.WriteString(strconv.Itoa(re.Min))
|
|
if re.Max != re.Min {
|
|
b.WriteRune(',')
|
|
if re.Max >= 0 {
|
|
b.WriteString(strconv.Itoa(re.Max))
|
|
}
|
|
}
|
|
b.WriteRune('}')
|
|
}
|
|
if re.Flags&NonGreedy != 0 {
|
|
b.WriteRune('?')
|
|
}
|
|
case OpConcat:
|
|
for _, sub := range re.Sub {
|
|
if sub.Op == OpAlternate {
|
|
b.WriteString(`(?:`)
|
|
writeRegexp(b, sub)
|
|
b.WriteString(`)`)
|
|
} else {
|
|
writeRegexp(b, sub)
|
|
}
|
|
}
|
|
case OpAlternate:
|
|
for i, sub := range re.Sub {
|
|
if i > 0 {
|
|
b.WriteRune('|')
|
|
}
|
|
writeRegexp(b, sub)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (re *Regexp) String() string {
|
|
var b bytes.Buffer
|
|
writeRegexp(&b, re)
|
|
return b.String()
|
|
}
|
|
|
|
const meta = `\.+*?()|[]{}^$`
|
|
|
|
func escape(b *bytes.Buffer, r rune, force bool) {
|
|
if unicode.IsPrint(r) {
|
|
if strings.IndexRune(meta, r) >= 0 || force {
|
|
b.WriteRune('\\')
|
|
}
|
|
b.WriteRune(r)
|
|
return
|
|
}
|
|
|
|
switch r {
|
|
case '\a':
|
|
b.WriteString(`\a`)
|
|
case '\f':
|
|
b.WriteString(`\f`)
|
|
case '\n':
|
|
b.WriteString(`\n`)
|
|
case '\r':
|
|
b.WriteString(`\r`)
|
|
case '\t':
|
|
b.WriteString(`\t`)
|
|
case '\v':
|
|
b.WriteString(`\v`)
|
|
default:
|
|
if r < 0x100 {
|
|
b.WriteString(`\x`)
|
|
s := strconv.FormatInt(int64(r), 16)
|
|
if len(s) == 1 {
|
|
b.WriteRune('0')
|
|
}
|
|
b.WriteString(s)
|
|
break
|
|
}
|
|
b.WriteString(`\x{`)
|
|
b.WriteString(strconv.FormatInt(int64(r), 16))
|
|
b.WriteString(`}`)
|
|
}
|
|
}
|
|
|
|
// MaxCap walks the regexp to find the maximum capture index.
|
|
func (re *Regexp) MaxCap() int {
|
|
m := 0
|
|
if re.Op == OpCapture {
|
|
m = re.Cap
|
|
}
|
|
for _, sub := range re.Sub {
|
|
if n := sub.MaxCap(); m < n {
|
|
m = n
|
|
}
|
|
}
|
|
return m
|
|
}
|
|
|
|
// CapNames walks the regexp to find the names of capturing groups.
|
|
func (re *Regexp) CapNames() []string {
|
|
names := make([]string, re.MaxCap()+1)
|
|
re.capNames(names)
|
|
return names
|
|
}
|
|
|
|
func (re *Regexp) capNames(names []string) {
|
|
if re.Op == OpCapture {
|
|
names[re.Cap] = re.Name
|
|
}
|
|
for _, sub := range re.Sub {
|
|
sub.capNames(names)
|
|
}
|
|
}
|