Source for file CompressStr.php
Documentation is available at CompressStr.php
* ----------------------------------------------------------------------
* Copyright (c) 2006-2016 Khaled Al-Sham'aa.
* ----------------------------------------------------------------------
* This program is open source product; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License (LGPL)
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
* ----------------------------------------------------------------------
* Class Name: Compress string using Huffman-like coding
* Filename: CompressStr.php
* Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
* Purpose: This class will compress given string in binary format
* using variable-length code table (derived in a particular way
* based on the estimated probability of occurrence for each
* possible value of the source symbol) for encoding a source symbol
* ----------------------------------------------------------------------
* Arabic Compress String Class
* Compress string using Huffman-like coding
* This class compresses text strings into roughly 70% of their original size
* by benefit from using compact coding for most frequented letters in a given
* language. This algorithm associated with text language, so you will find 6
* different classes for the following languages: Arabic, English, French,
* German, Italian and Spanish language.
* Benefits of this compress algorithm include:
* - It is written in pure PHP code, so there is no need to any
* PHP extensions to use it.
* - You can search in compressed string directly without any need uncompress
* - You can get original string length directly without need to uncompress
* Unfortunately text compressed using this algorithm lose the structure that
* normal zip algorithm used, so benefits from using ZLib functions on this
* There is another drawback, this algorithm working only on text from a given
* language, it does not working fine on binary files like images or PDF.
* include('./I18N/Arabic.php');
* $obj = new I18N_Arabic('CompressStr');
* $obj->setInputCharset('windows-1256');
* $obj->setOutputCharset('windows-1256');
* $file = 'Compress/ar_example.txt';
* $fh = fopen($file, 'r');
* $str = fread($fh, filesize($file));
* $zip = $obj->compress($str);
* $before = strlen($str);
* $rate = round($after * 100 / $before);
* echo "String size before was: $before Byte<br>";
* echo "Compressed string size after is: $after Byte<br>";
* echo "Rate $rate %<hr>";
* $str = $obj->decompress($zip);
* if ($obj->search($zip, $word)) {
* echo "Search for $word in zipped string and find it<hr>";
* echo "Search for $word in zipped string and do not find it<hr>";
* $len = $obj->length($zip);
* echo "Original length of zipped string is $len Byte<hr>";
* echo '<div dir="rtl" align="justify">'.nl2br($str).'</div>';
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
* This PHP class compress Arabic string using Huffman-like coding
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
* Loads initialize values
public function __construct()
self::$_encode = iconv('utf-8', 'cp1256', ' الميوتة');
self::$_binary = '0000|0001|0010|0011|0100|0101|0110|0111|';
self::$_hex = '0123456789abcdef';
self::$_bin = '0000|0001|0010|0011|0100|0101|0110|0111|1000|';
self::$_bin = self::$_bin . '1001|1010|1011|1100|1101|1110|1111|';
* Set required encode and binary hash of most probably character in
* @param string $lang [en, fr, gr, it, sp, ar] Language profile selected
* @return object $this to build a fluent interface
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
self::$_encode = ' etaoins';
self::$_encode = ' enasriu';
self::$_encode = ' enristu';
self::$_encode = ' eiaorln';
self::$_encode = ' eaosrin';
self::$_encode = iconv('utf-8', 'cp1256', ' الميوتة');
self::$_binary = '0000|0001|0010|0011|0100|0101|0110|0111|';
* Compress the given string using the Huffman-like coding
* @param string $str The text to compress
* @return binary The compressed string in binary format
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
$str = iconv('utf-8', 'cp1256', $str);
$bits = self::str2bits($str);
$hex = self::bits2hex($bits);
* Uncompress a compressed string
* @param binary $bin The text compressed by compress().
* @return string The original uncompressed string
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
$bits = self::hex2bits($bytes);
$str = self::bits2str($bits);
$str = iconv('cp1256', 'utf-8', $str);
* Search a compressed string for a given word
* @param binary $bin Compressed binary string
* @param string $word The string you looking for
* @return boolean True if found and False if not found
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function search($bin, $word)
$word = iconv('utf-8', 'cp1256', $word);
$wBits = self::str2bits($word);
$bits = self::hex2bits($bytes);
* Retrieve the original string length
* @param binary $bin Compressed binary string
* @return integer Original string length
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function length($bin)
$bits = self::hex2bits($bytes);
while (isset ($bits[$i])) {
* Convert textual string into binary string
* @param string $str The textual string to convert
* @return binary The binary representation of textual string
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
protected static function str2bits($str)
$pos = strpos(self::$_encode, $char);
$bits .= substr(self::$_binary, $pos* 5, 4);
$bits .= '1'. substr(self::$_bin, (int) ($int/ 16)* 5, 4);
$bits .= substr(self::$_bin, ($int% 16)* 5, 4);
$add = strlen($bits) % 4;
* Convert binary string into textual string
* @param binary $bits The binary string to convert
* @return string The textual representation of binary string
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
protected static function bits2str($bits)
if ($bits || strlen($code) == 8) {
if ($bits || strlen($code) == 3) {
$pos = strpos(self::$_binary, "0$code|");
$str .= substr(self::$_encode, $pos/ 5, 1);
* Convert binary string into hexadecimal string
* @param binary $bits The binary string to convert
* @return hexadecimal The hexadecimal representation of binary string
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
protected static function bits2hex($bits)
$total = strlen($bits) / 4;
for ($i = 0; $i < $total; $i++ ) {
$nibbel = substr($bits, $i* 4, 4);
$pos = strpos(self::$_bin, $nibbel);
$hex .= substr(self::$_hex, $pos/ 5, 1);
* Convert hexadecimal string into binary string
* @param hexadecimal $hex The hexadezimal string to convert
* @return binary The binary representation of hexadecimal string
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
protected static function hex2bits($hex)
for ($i = 0; $i < $total; $i++ ) {
$pos = strpos(self::$_hex, $hex[$i]);
$bits .= substr(self::$_bin, $pos* 5, 4);
|