From a4f9e08a850256ce24103b368fa9306ef1e85c25 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 22 Jun 2020 14:54:43 -0400 Subject: [PATCH 1/6] fix #1151 --- conventional.yaml | 7 ++++++- default.yaml | 7 ++++++- irc/client.go | 7 ++++++- irc/commands.go | 5 +++++ irc/config.go | 1 + irc/errors.go | 1 + irc/handlers.go | 6 ++++++ irc/ircconn.go | 9 ++++++--- irc/server.go | 3 +++ irc/socket.go | 3 +++ irc/strings.go | 2 ++ 11 files changed, 45 insertions(+), 6 deletions(-) diff --git a/conventional.yaml b/conventional.yaml index 574b6997..d1c1afab 100644 --- a/conventional.yaml +++ b/conventional.yaml @@ -100,7 +100,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -110,6 +110,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/default.yaml b/default.yaml index d161462c..a075b3d7 100644 --- a/default.yaml +++ b/default.yaml @@ -126,7 +126,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -136,6 +136,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/irc/client.go b/irc/client.go index 4a36e926..f934d001 100644 --- a/irc/client.go +++ b/irc/client.go @@ -615,8 +615,11 @@ func (client *Client) run(session *Session) { firstLine := !isReattach for { + var invalidUtf8 bool line, err := session.socket.Read() - if err != nil { + if err == errInvalidUtf8 { + invalidUtf8 = true // handle as normal, including labeling + } else if err != nil { quitMessage := "connection closed" if err == errReadQ { quitMessage = "readQ exceeded" @@ -676,6 +679,8 @@ func (client *Client) run(session *Session) { cmd, exists := Commands[msg.Command] if !exists { cmd = unknownCommand + } else if invalidUtf8 { + cmd = invalidUtf8Command } isExiting := cmd.Run(client.server, client, session, msg) diff --git a/irc/commands.go b/irc/commands.go index 01423c65..0870c80b 100644 --- a/irc/commands.go +++ b/irc/commands.go @@ -79,6 +79,11 @@ var unknownCommand = Command{ usablePreReg: true, } +var invalidUtf8Command = Command{ + handler: invalidUtf8Handler, + usablePreReg: true, +} + // Commands holds all commands executable by a client connected to us. var Commands map[string]Command diff --git a/irc/config.go b/irc/config.go index 3e8723f4..7f3373d8 100644 --- a/irc/config.go +++ b/irc/config.go @@ -518,6 +518,7 @@ type Config struct { supportedCaps *caps.Set capValues caps.Values Casemapping Casemapping + EnforceUtf8 bool `yaml:"enforce-utf8"` OutputPath string `yaml:"output-path"` } diff --git a/irc/errors.go b/irc/errors.go index 6f199677..09afdf8a 100644 --- a/irc/errors.go +++ b/irc/errors.go @@ -66,6 +66,7 @@ var ( errCredsExternallyManaged = errors.New("Credentials are externally managed and cannot be changed here") errInvalidMultilineBatch = errors.New("Invalid multiline batch") errTimedOut = errors.New("Operation timed out") + errInvalidUtf8 = errors.New("Message rejected for invalid utf8") ) // Socket Errors diff --git a/irc/handlers.go b/irc/handlers.go index c4f9d515..d4ae95e6 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -2927,3 +2927,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command")) return false } + +// fake handler for invalid utf8 +func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool { + rb.Add(nil, server.name, ERR_UNKNOWNERROR, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Message rejected for containing invalid UTF-8")) + return false +} diff --git a/irc/ircconn.go b/irc/ircconn.go index bf53bad4..87f9b584 100644 --- a/irc/ircconn.go +++ b/irc/ircconn.go @@ -77,6 +77,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) { return nil, errReadQ } line = bytes.TrimSuffix(line, crlf) + if globalUtf8EnforcementSetting && !utf8.Valid(line) { + err = errInvalidUtf8 + } return } @@ -101,9 +104,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn { func (wc IRCWSConn) WriteLine(buf []byte) (err error) { buf = bytes.TrimSuffix(buf, crlf) - // there's not much we can do about this; - // silently drop the message - if !utf8.Valid(buf) { + if !globalUtf8EnforcementSetting && !utf8.Valid(buf) { + // there's not much we can do about this; + // silently drop the message return nil } return wc.conn.WriteMessage(websocket.TextMessage, buf) diff --git a/irc/server.go b/irc/server.go index a6f2c168..98725553 100644 --- a/irc/server.go +++ b/irc/server.go @@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) { server.name = config.Server.Name server.nameCasefolded = config.Server.nameCasefolded globalCasemappingSetting = config.Server.Casemapping + globalUtf8EnforcementSetting = config.Server.EnforceUtf8 } else { // enforce configs that can't be changed after launch: if server.name != config.Server.Name { @@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) { return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted") } else if globalCasemappingSetting != config.Server.Casemapping { return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted") + } else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 { + return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted") } else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn { return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted") } diff --git a/irc/socket.go b/irc/socket.go index 289b7e2e..6304a1a4 100644 --- a/irc/socket.go +++ b/irc/socket.go @@ -75,6 +75,9 @@ func (socket *Socket) Read() (string, error) { if err == io.EOF && strings.TrimSpace(line) != "" { // don't do anything + } else if err == errInvalidUtf8 { + // pass the data through so we can parse the command at least + return line, err } else if err != nil { return "", err } diff --git a/irc/strings.go b/irc/strings.go index 6c8100f6..7f3f6076 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -50,6 +50,8 @@ const ( // this happens-before all IRC connections and all casefolding operations. var globalCasemappingSetting Casemapping = CasemappingPRECIS +var globalUtf8EnforcementSetting bool + // Each pass of PRECIS casefolding is a composition of idempotent operations, // but not idempotent itself. Therefore, the spec says "do it four times and hope // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option, From 21e604860f73f67648e666d6a1ffd72271ef7e10 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 22 Jun 2020 15:56:47 -0400 Subject: [PATCH 2/6] add an explanatory comment --- irc/strings.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/irc/strings.go b/irc/strings.go index 7f3f6076..35ab98cf 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -50,6 +50,10 @@ const ( // this happens-before all IRC connections and all casefolding operations. var globalCasemappingSetting Casemapping = CasemappingPRECIS +// XXX analogous unsynchronized global variable controlling utf8 validation +// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459 +// octets) and invalid utf8 messages are silently dropped for websocket clients only. +// if this is on, invalid utf8 inputs get an ERR_UNKNOWNERROR. var globalUtf8EnforcementSetting bool // Each pass of PRECIS casefolding is a composition of idempotent operations, From 58d3d1276f8f280c7c6d767074267a130509877a Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 22 Jun 2020 18:53:54 -0400 Subject: [PATCH 3/6] review fix --- irc/handlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irc/handlers.go b/irc/handlers.go index d4ae95e6..805b8121 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -2930,6 +2930,6 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage // fake handler for invalid utf8 func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool { - rb.Add(nil, server.name, ERR_UNKNOWNERROR, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Message rejected for containing invalid UTF-8")) + rb.Add(nil, server.name, "FAIL", utils.SafeErrorParam(msg.Command), "INVALID_UTF8", client.t("Message rejected for containing invalid UTF-8")) return false } From be138e4d71a220fdf3378d7d504fdbaa2463b57c Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 22 Jun 2020 22:34:09 -0400 Subject: [PATCH 4/6] make comment consistent with review fix --- irc/strings.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irc/strings.go b/irc/strings.go index 35ab98cf..f1f6ccf5 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -53,7 +53,7 @@ var globalCasemappingSetting Casemapping = CasemappingPRECIS // XXX analogous unsynchronized global variable controlling utf8 validation // if this is off, you get the traditional IRC behavior (relaying any valid RFC1459 // octets) and invalid utf8 messages are silently dropped for websocket clients only. -// if this is on, invalid utf8 inputs get an ERR_UNKNOWNERROR. +// if this is on, invalid utf8 inputs get a FAIL reply. var globalUtf8EnforcementSetting bool // Each pass of PRECIS casefolding is a composition of idempotent operations, From 28a0ec86b58ba0721962a0a4cdba06eb38f899dd Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Tue, 23 Jun 2020 03:18:49 -0400 Subject: [PATCH 5/6] simplify Socket.Read --- irc/socket.go | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/irc/socket.go b/irc/socket.go index 6304a1a4..f4d6844e 100644 --- a/irc/socket.go +++ b/irc/socket.go @@ -7,7 +7,6 @@ package irc import ( "errors" "io" - "strings" "sync" "github.com/oragono/oragono/irc/utils" @@ -59,30 +58,24 @@ func (socket *Socket) Close() { // Read returns a single IRC line from a Socket. func (socket *Socket) Read() (string, error) { + // immediately fail if Close() has been called, even if there's + // still data in a bufio.Reader or websocket buffer: if socket.IsClosed() { return "", io.EOF } lineBytes, err := socket.conn.ReadLine() - - // convert bytes to string line := string(lineBytes) - // read last message properly (such as ERROR/QUIT/etc), just fail next reads/writes if err == io.EOF { socket.Close() + // process last message properly (such as ERROR/QUIT/etc), just fail next reads/writes + if line != "" { + err = nil + } } - if err == io.EOF && strings.TrimSpace(line) != "" { - // don't do anything - } else if err == errInvalidUtf8 { - // pass the data through so we can parse the command at least - return line, err - } else if err != nil { - return "", err - } - - return line, nil + return line, err } // Write sends the given string out of Socket. Requirements: From 8cadc7340ac3267e5bc96a22d609875b188fa840 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Tue, 23 Jun 2020 03:19:08 -0400 Subject: [PATCH 6/6] don't trim line endings in IRCStreamConn Doesn't help if the line ends with regular \n only, and the parser has to account for \r and \n anyway --- irc/ircconn.go | 1 - 1 file changed, 1 deletion(-) diff --git a/irc/ircconn.go b/irc/ircconn.go index 87f9b584..b70a7984 100644 --- a/irc/ircconn.go +++ b/irc/ircconn.go @@ -76,7 +76,6 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) { if isPrefix { return nil, errReadQ } - line = bytes.TrimSuffix(line, crlf) if globalUtf8EnforcementSetting && !utf8.Valid(line) { err = errInvalidUtf8 }