From 57c8e4ee7cea126e68ade4913c57a46f06d1a2b0 Mon Sep 17 00:00:00 2001 From: Michael Foster Date: Sun, 28 Jul 2013 20:33:26 -0400 Subject: [PATCH] Strip combining characters from Unicode strings (eg. Zalgo) --- inc/config.php | 2 ++ inc/functions.php | 48 ++++++++++++++++++++++++++++++++++++++++++++++- post.php | 9 ++++++++- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/inc/config.php b/inc/config.php index 98e127b3..d7580a8b 100644 --- a/inc/config.php +++ b/inc/config.php @@ -282,6 +282,8 @@ $config['strip_superfluous_returns'] = true; // Require an image for threads? $config['force_image_op'] = true; + // Strip combining characters from Unicode strings (eg. "Zalgo") + $config['strip_combining_chars'] = true; // Max body length $config['max_body'] = 1800; diff --git a/inc/functions.php b/inc/functions.php index b5660664..9d398002 100644 --- a/inc/functions.php +++ b/inc/functions.php @@ -1526,7 +1526,53 @@ function utf8tohtml($utf8) { return htmlspecialchars($utf8, ENT_NOQUOTES, 'UTF-8'); } -function buildThread($id, $return=false, $mod=false) { +function ordutf8($string, &$offset) { + $code = ord(substr($string, $offset,1)); + if ($code >= 128) { // otherwise 0xxxxxxx + if ($code < 224) + $bytesnumber = 2; // 110xxxxx + else if ($code < 240) + $bytesnumber = 3; // 1110xxxx + else if ($code < 248) + $bytesnumber = 4; // 11110xxx + $codetemp = $code - 192 - ($bytesnumber > 2 ? 32 : 0) - ($bytesnumber > 3 ? 16 : 0); + for ($i = 2; $i <= $bytesnumber; $i++) { + $offset ++; + $code2 = ord(substr($string, $offset, 1)) - 128; //10xxxxxx + $codetemp = $codetemp*64 + $code2; + } + $code = $codetemp; + } + $offset += 1; + if ($offset >= strlen($string)) + $offset = -1; + return $code; +} + +function strip_combining_chars($str) { + $chars = preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY); + $str = ''; + foreach ($chars as $char) { + $ord = ordutf8($char, $o = 0); + + if ($ord >= 768 && $ord <= 879) + continue; + + if ($ord >= 7616 && $ord <= 7679) + continue; + + if ($ord >= 8400 && $ord <= 8447) + continue; + + if ($ord >= 65056 && $ord <= 65071) + continue; + + $str .= $char; + } + return $str; +} + +function buildThread($id, $return = false, $mod = false) { global $board, $config; $id = round($id); diff --git a/post.php b/post.php index 4e3a1a9f..6855fe14 100644 --- a/post.php +++ b/post.php @@ -357,6 +357,13 @@ if (isset($_POST['delete'])) { $post['thumb'] = $board['dir'] . $config['dir']['thumb'] . $post['file_id'] . '.' . ($config['thumb_ext'] ? $config['thumb_ext'] : $post['extension']); } + if ($config['strip_combining_chars']) { + $post['name'] = strip_combining_chars($post['name']); + $post['email'] = strip_combining_chars($post['email']); + $post['subject'] = strip_combining_chars($post['name']); + $post['body'] = strip_combining_chars($post['body']); + } + // Check string lengths if (mb_strlen($post['name']) > 35) error(sprintf($config['error']['toolong'], 'name')); @@ -368,7 +375,7 @@ if (isset($_POST['delete'])) { error($config['error']['toolong_body']); if (mb_strlen($post['password']) > 20) error(sprintf($config['error']['toolong'], 'password')); - + wordfilters($post['body']); $post['body_nomarkup'] = $post['body'];