From 397b61df257f72a8ce90792985f76497ba735da4 Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 20 Feb 2007 23:02:35 +0000 Subject: Use ASCII KCODE to prevent problems like missing characters or matching failures when clients send messages in something else than UTF-8 --- bin/rbot | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'bin/rbot') diff --git a/bin/rbot b/bin/rbot index 5872e3e1..b7a6207f 100755 --- a/bin/rbot +++ b/bin/rbot @@ -21,7 +21,16 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -$KCODE = 'u' +# Most of the string processing across rbot is done against IRC messages, which +# do not have a well-defined encoding. Although many clients are now using +# UTF-8, there is no guarantee that an arbitrary string received from IRC will +# be UTF-8 encoded. We have to force ASCII (byte-wise/charset agnostic) +# matching because otherwise some strings can give problems: in particular, for +# example, the bytesequence "\340\350\354\362\371" (that is the aeiou vowels, +# each with a grave accent) will cause the string to be considered up to the +# "\354" (i with grave accent) only: so either the rest of the message is +# ignored, or the matching fails. +$KCODE = 'a' $VERBOSE=true -- cgit v1.2.3