From 6f00f89efac01e5729babb928893ec4990be5caf Mon Sep 17 00:00:00 2001 From: Jeremy Latt Date: Wed, 26 Feb 2014 13:11:29 -0800 Subject: [PATCH] relax unicode parsing rules NFKC was previously used for all text. Now, we use NFKC for all args but the last, which may be free text. This arg is normalized with NFC to allow for formatting characters. --- irc/commands.go | 6 ++++-- irc/constants.go | 2 +- irc/socket.go | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/irc/commands.go b/irc/commands.go index a255e3c6..2a75324c 100644 --- a/irc/commands.go +++ b/irc/commands.go @@ -2,6 +2,7 @@ package irc import ( "code.google.com/p/go.crypto/bcrypt" + "code.google.com/p/go.text/unicode/norm" "errors" "fmt" "regexp" @@ -99,10 +100,11 @@ var ( func parseLine(line string) (StringCode, []string) { var parts []string if colonIndex := strings.IndexRune(line, ':'); colonIndex >= 0 { - lastArg := line[colonIndex+len(":"):] - line = line[:colonIndex-len(" ")] + lastArg := norm.NFC.String(line[colonIndex+len(":"):]) + line = norm.NFKC.String(line[:colonIndex-len(" ")]) parts = append(spacesExpr.Split(line, -1), lastArg) } else { + line = norm.NFKC.String(line) parts = spacesExpr.Split(line, -1) } return StringCode(strings.ToUpper(parts[0])), parts[1:] diff --git a/irc/constants.go b/irc/constants.go index 3fd5ddd5..49bf4c93 100644 --- a/irc/constants.go +++ b/irc/constants.go @@ -23,7 +23,7 @@ var ( ) const ( - SEM_VER = "ergonomadic-1.2.5" + SEM_VER = "ergonomadic-1.2.6" CRLF = "\r\n" MAX_REPLY_LEN = 512 - len(CRLF) diff --git a/irc/socket.go b/irc/socket.go index 7aa8d93e..a93e7a5b 100644 --- a/irc/socket.go +++ b/irc/socket.go @@ -2,7 +2,6 @@ package irc import ( "bufio" - "code.google.com/p/go.text/unicode/norm" "io" "log" "net" @@ -24,7 +23,7 @@ type Socket struct { func NewSocket(conn net.Conn, commands chan<- editableCommand) *Socket { socket := &Socket{ conn: conn, - reader: bufio.NewReader(norm.NFKC.Reader(conn)), + reader: bufio.NewReader(conn), writer: bufio.NewWriter(conn), }