diff --git a/conventional.yaml b/conventional.yaml index cc0a18d0..257f61a3 100644 --- a/conventional.yaml +++ b/conventional.yaml @@ -100,7 +100,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -110,6 +110,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/default.yaml b/default.yaml index a7db127c..c0c768a9 100644 --- a/default.yaml +++ b/default.yaml @@ -126,7 +126,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -136,6 +136,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/irc/client.go b/irc/client.go index 2dea35ab..2aae8dca 100644 --- a/irc/client.go +++ b/irc/client.go @@ -615,8 +615,11 @@ func (client *Client) run(session *Session) { firstLine := !isReattach for { + var invalidUtf8 bool line, err := session.socket.Read() - if err != nil { + if err == errInvalidUtf8 { + invalidUtf8 = true // handle as normal, including labeling + } else if err != nil { quitMessage := "connection closed" if err == errReadQ { quitMessage = "readQ exceeded" @@ -676,6 +679,8 @@ func (client *Client) run(session *Session) { cmd, exists := Commands[msg.Command] if !exists { cmd = unknownCommand + } else if invalidUtf8 { + cmd = invalidUtf8Command } isExiting := cmd.Run(client.server, client, session, msg) diff --git a/irc/commands.go b/irc/commands.go index 01423c65..0870c80b 100644 --- a/irc/commands.go +++ b/irc/commands.go @@ -79,6 +79,11 @@ var unknownCommand = Command{ usablePreReg: true, } +var invalidUtf8Command = Command{ + handler: invalidUtf8Handler, + usablePreReg: true, +} + // Commands holds all commands executable by a client connected to us. var Commands map[string]Command diff --git a/irc/config.go b/irc/config.go index bd002316..59bab09d 100644 --- a/irc/config.go +++ b/irc/config.go @@ -519,6 +519,7 @@ type Config struct { supportedCaps *caps.Set capValues caps.Values Casemapping Casemapping + EnforceUtf8 bool `yaml:"enforce-utf8"` OutputPath string `yaml:"output-path"` } diff --git a/irc/errors.go b/irc/errors.go index 6f199677..09afdf8a 100644 --- a/irc/errors.go +++ b/irc/errors.go @@ -66,6 +66,7 @@ var ( errCredsExternallyManaged = errors.New("Credentials are externally managed and cannot be changed here") errInvalidMultilineBatch = errors.New("Invalid multiline batch") errTimedOut = errors.New("Operation timed out") + errInvalidUtf8 = errors.New("Message rejected for invalid utf8") ) // Socket Errors diff --git a/irc/handlers.go b/irc/handlers.go index ed53f338..d733a6f9 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -2918,3 +2918,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command")) return false } + +// fake handler for invalid utf8 +func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool { + rb.Add(nil, server.name, "FAIL", utils.SafeErrorParam(msg.Command), "INVALID_UTF8", client.t("Message rejected for containing invalid UTF-8")) + return false +} diff --git a/irc/ircconn.go b/irc/ircconn.go index bf53bad4..b70a7984 100644 --- a/irc/ircconn.go +++ b/irc/ircconn.go @@ -76,7 +76,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) { if isPrefix { return nil, errReadQ } - line = bytes.TrimSuffix(line, crlf) + if globalUtf8EnforcementSetting && !utf8.Valid(line) { + err = errInvalidUtf8 + } return } @@ -101,9 +103,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn { func (wc IRCWSConn) WriteLine(buf []byte) (err error) { buf = bytes.TrimSuffix(buf, crlf) - // there's not much we can do about this; - // silently drop the message - if !utf8.Valid(buf) { + if !globalUtf8EnforcementSetting && !utf8.Valid(buf) { + // there's not much we can do about this; + // silently drop the message return nil } return wc.conn.WriteMessage(websocket.TextMessage, buf) diff --git a/irc/server.go b/irc/server.go index a6f2c168..98725553 100644 --- a/irc/server.go +++ b/irc/server.go @@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) { server.name = config.Server.Name server.nameCasefolded = config.Server.nameCasefolded globalCasemappingSetting = config.Server.Casemapping + globalUtf8EnforcementSetting = config.Server.EnforceUtf8 } else { // enforce configs that can't be changed after launch: if server.name != config.Server.Name { @@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) { return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted") } else if globalCasemappingSetting != config.Server.Casemapping { return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted") + } else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 { + return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted") } else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn { return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted") } diff --git a/irc/socket.go b/irc/socket.go index 289b7e2e..f4d6844e 100644 --- a/irc/socket.go +++ b/irc/socket.go @@ -7,7 +7,6 @@ package irc import ( "errors" "io" - "strings" "sync" "github.com/oragono/oragono/irc/utils" @@ -59,27 +58,24 @@ func (socket *Socket) Close() { // Read returns a single IRC line from a Socket. func (socket *Socket) Read() (string, error) { + // immediately fail if Close() has been called, even if there's + // still data in a bufio.Reader or websocket buffer: if socket.IsClosed() { return "", io.EOF } lineBytes, err := socket.conn.ReadLine() - - // convert bytes to string line := string(lineBytes) - // read last message properly (such as ERROR/QUIT/etc), just fail next reads/writes if err == io.EOF { socket.Close() + // process last message properly (such as ERROR/QUIT/etc), just fail next reads/writes + if line != "" { + err = nil + } } - if err == io.EOF && strings.TrimSpace(line) != "" { - // don't do anything - } else if err != nil { - return "", err - } - - return line, nil + return line, err } // Write sends the given string out of Socket. Requirements: diff --git a/irc/strings.go b/irc/strings.go index 6c8100f6..f1f6ccf5 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -50,6 +50,12 @@ const ( // this happens-before all IRC connections and all casefolding operations. var globalCasemappingSetting Casemapping = CasemappingPRECIS +// XXX analogous unsynchronized global variable controlling utf8 validation +// if this is off, you get the traditional IRC behavior (relaying any valid RFC1459 +// octets) and invalid utf8 messages are silently dropped for websocket clients only. +// if this is on, invalid utf8 inputs get a FAIL reply. +var globalUtf8EnforcementSetting bool + // Each pass of PRECIS casefolding is a composition of idempotent operations, // but not idempotent itself. Therefore, the spec says "do it four times and hope // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,