mirror of
https://github.com/jeremyd/ergo.git
synced 2026-06-29 21:52:05 -07:00
fix #1003
This commit is contained in:
+38
-6
@@ -11,21 +11,53 @@ import (
|
||||
|
||||
// yet another glob implementation in Go
|
||||
|
||||
func CompileGlob(glob string) (result *regexp.Regexp, err error) {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteByte('^')
|
||||
func addRegexp(buf *bytes.Buffer, glob string, submatch bool) (err error) {
|
||||
for _, r := range glob {
|
||||
switch r {
|
||||
case '*':
|
||||
buf.WriteString("(.*)")
|
||||
if submatch {
|
||||
buf.WriteString("(.*)")
|
||||
} else {
|
||||
buf.WriteString(".*")
|
||||
}
|
||||
case '?':
|
||||
buf.WriteString("(.)")
|
||||
if submatch {
|
||||
buf.WriteString("(.)")
|
||||
} else {
|
||||
buf.WriteString(".")
|
||||
}
|
||||
case 0xFFFD:
|
||||
return nil, &syntax.Error{Code: syntax.ErrInvalidUTF8, Expr: glob}
|
||||
return &syntax.Error{Code: syntax.ErrInvalidUTF8, Expr: glob}
|
||||
default:
|
||||
buf.WriteString(regexp.QuoteMeta(string(r)))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func CompileGlob(glob string, submatch bool) (result *regexp.Regexp, err error) {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteByte('^')
|
||||
err = addRegexp(&buf, glob, submatch)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
buf.WriteByte('$')
|
||||
return regexp.Compile(buf.String())
|
||||
}
|
||||
|
||||
func CompileMasks(masks []string) (result *regexp.Regexp, err error) {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("^(")
|
||||
for i, mask := range masks {
|
||||
err = addRegexp(&buf, mask, false)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if i != len(masks)-1 {
|
||||
buf.WriteByte('|')
|
||||
}
|
||||
}
|
||||
buf.WriteString(")$")
|
||||
return regexp.Compile(buf.String())
|
||||
}
|
||||
|
||||
+121
-1
@@ -9,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
func globMustCompile(glob string) *regexp.Regexp {
|
||||
re, err := CompileGlob(glob)
|
||||
re, err := CompileGlob(glob, false)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@@ -46,3 +46,123 @@ func TestGlob(t *testing.T) {
|
||||
assertMatches("S*e", "Skåne", true, t)
|
||||
assertMatches("Sk?ne", "Skåne", true, t)
|
||||
}
|
||||
|
||||
func BenchmarkGlob(b *testing.B) {
|
||||
g := globMustCompile("https://*google.com")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
g.MatchString("https://www.google.com")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGlobCompilation(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
CompileGlob("https://*google.com", false)
|
||||
}
|
||||
}
|
||||
|
||||
// these are actual bans from my production network :-/
|
||||
var bans = []string{
|
||||
"*!*@tor-network.onion",
|
||||
"`!*@*",
|
||||
"qanon!*@*",
|
||||
"*!bibi@tor-network.onion",
|
||||
"shivarm!*@*",
|
||||
"8====d!*@*",
|
||||
"shiviram!*@*",
|
||||
"poop*!*@*",
|
||||
"shivoram!*@*",
|
||||
"shivvy!*@*",
|
||||
"shavirim!*@*",
|
||||
"shivarm_!*@*",
|
||||
"_!*@*",
|
||||
}
|
||||
|
||||
func TestMasks(t *testing.T) {
|
||||
matcher, err := CompileMasks(bans)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if !matcher.MatchString("evan!user@tor-network.onion") {
|
||||
t.Errorf("match expected")
|
||||
}
|
||||
if !matcher.MatchString("`!evan@b9un4fv3he44q.example.com") {
|
||||
t.Errorf("match expected")
|
||||
}
|
||||
if matcher.MatchString("horse!horse@t5dwi8vacg47y.example.com") {
|
||||
t.Errorf("match not expected")
|
||||
}
|
||||
if matcher.MatchString("horse_!horse@t5dwi8vacg47y.example.com") {
|
||||
t.Errorf("match not expected")
|
||||
}
|
||||
if matcher.MatchString("shivaram!shivaram@yrqgsrjy2p7my.example.com") {
|
||||
t.Errorf("match not expected")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMasksCompile(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
CompileMasks(bans)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMasksMatch(b *testing.B) {
|
||||
matcher, _ := CompileMasks(bans)
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
matcher.MatchString("evan!user@tor-network.onion")
|
||||
matcher.MatchString("horse_!horse@t5dwi8vacg47y.example.com")
|
||||
matcher.MatchString("shivaram!shivaram@yrqgsrjy2p7my.example.com")
|
||||
}
|
||||
}
|
||||
|
||||
// compare performance to compilation of the | clauses as separate regexes
|
||||
// first for compilation, then for matching
|
||||
|
||||
func compileAll(masks []string) (result []*regexp.Regexp, err error) {
|
||||
a := make([]*regexp.Regexp, 0, len(masks))
|
||||
for _, mask := range masks {
|
||||
m, err := CompileGlob(mask, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
a = append(a, m)
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func matchesAny(masks []*regexp.Regexp, str string) bool {
|
||||
for _, r := range masks {
|
||||
if r.MatchString(str) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func BenchmarkLinearCompile(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
compileAll(bans)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLinearMatch(b *testing.B) {
|
||||
a, err := compileAll(bans)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if matchesAny(a, "horse_!horse@t5dwi8vacg47y.example.com") {
|
||||
panic("incorrect match")
|
||||
}
|
||||
if !matchesAny(a, "evan!user@tor-network.onion") {
|
||||
panic("incorrect match")
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
matchesAny(a, "horse_!horse@t5dwi8vacg47y.example.com")
|
||||
matchesAny(a, "evan!user@tor-network.onion")
|
||||
matchesAny(a, "shivaram!shivaram@yrqgsrjy2p7my.example.com")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user