Source for file Glyphs.php
Documentation is available at Glyphs.php
* ----------------------------------------------------------------------
* Copyright (c) 2006-2016 Khaled Al-Sham'aa.
* ----------------------------------------------------------------------
* This program is open source product; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License (LGPL)
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
* ----------------------------------------------------------------------
* Class Name: Arabic Glyphs is a simple class to render Arabic text
* Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
* Purpose: This class takes Arabic text (encoded in Windows-1256 character
* set) as input and performs Arabic glyph joining on it and outputs
* a UTF-8 hexadecimals stream that is no longer logically arranged
* but in a visual order which gives readable results when formatted
* with a simple Unicode rendering just like GD and UFPDF libraries
* that does not handle basic connecting glyphs of Arabic language
* yet but simply outputs all stand alone glyphs in left-to-right
* ----------------------------------------------------------------------
* Arabic Glyphs is class to render Arabic text
* PHP class to render Arabic text by performs Arabic glyph joining on it,
* then output a UTF-8 hexadecimals stream gives readable results on PHP
* libraries supports UTF-8.
* include('./I18N/Arabic.php');
* $obj = new I18N_Arabic('Glyphs');
* $text = $obj->utf8Glyphs($text);
* imagettftext($im, 20, 0, 200, 100, $black, $font, $text);
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
* This PHP class render Arabic text by performs Arabic glyph joining on it
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
private $_prevLink = null;
private $_nextLink = null;
* Loads initialize values
public function __construct()
$this->_prevLink = '،؟؛ـئبتثجحخسشصضطظعغفقكلمنهي';
$this->_nextLink = 'ـآأؤإائبةتثجحخدذرز';
$this->_nextLink .= 'سشصضطظعغفقكلمنهوىي';
$this->_vowel = 'ًٌٍَُِّْ';
$this->_glyphs['ً'] = array('FE70','FE71');
$this->_glyphs['ٌ'] = array('FE72','FE72');
$this->_glyphs['ٍ'] = array('FE74','FE74');
$this->_glyphs['َ'] = array('FE76','FE77');
$this->_glyphs['ُ'] = array('FE78','FE79');
$this->_glyphs['ِ'] = array('FE7A','FE7B');
$this->_glyphs['ّ'] = array('FE7C','FE7D');
$this->_glyphs['ْ'] = array('FE7E','FE7E');
$this->_glyphs = 'ًٌٍَُِّْٰ';
$this->_hex = '064B064B064B064B064C064C064C064C064D064D064D064D064E064E';
$this->_hex .= '064E064E064F064F064F064F06500650065006500651065106510651';
$this->_hex .= '06520652065206520670067006700670';
$this->_glyphs .= 'ءآأؤإئاب';
$this->_hex .= 'FE80FE80FE80FE80FE81FE82FE81FE82FE83FE84FE83FE84FE85FE86';
$this->_hex .= 'FE85FE86FE87FE88FE87FE88FE89FE8AFE8BFE8CFE8DFE8EFE8DFE8E';
$this->_hex .= 'FE8FFE90FE91FE92';
$this->_glyphs .= 'ةتثجحخدذ';
$this->_hex .= 'FE93FE94FE93FE94FE95FE96FE97FE98FE99FE9AFE9BFE9CFE9DFE9E';
$this->_hex .= 'FE9FFEA0FEA1FEA2FEA3FEA4FEA5FEA6FEA7FEA8FEA9FEAAFEA9FEAA';
$this->_hex .= 'FEABFEACFEABFEAC';
$this->_glyphs .= 'رزسشصضطظ';
$this->_hex .= 'FEADFEAEFEADFEAEFEAFFEB0FEAFFEB0FEB1FEB2FEB3FEB4FEB5FEB6';
$this->_hex .= 'FEB7FEB8FEB9FEBAFEBBFEBCFEBDFEBEFEBFFEC0FEC1FEC2FEC3FEC4';
$this->_hex .= 'FEC5FEC6FEC7FEC8';
$this->_glyphs .= 'عغفقكلمن';
$this->_hex .= 'FEC9FECAFECBFECCFECDFECEFECFFED0FED1FED2FED3FED4FED5FED6';
$this->_hex .= 'FED7FED8FED9FEDAFEDBFEDCFEDDFEDEFEDFFEE0FEE1FEE2FEE3FEE4';
$this->_hex .= 'FEE5FEE6FEE7FEE8';
$this->_glyphs .= 'هوىيـ،؟؛';
$this->_hex .= 'FEE9FEEAFEEBFEECFEEDFEEEFEEDFEEEFEEFFEF0FEEFFEF0FEF1FEF2';
$this->_hex .= 'FEF3FEF40640064006400640060C060C060C060C061F061F061F061F';
$this->_hex .= '061B061B061B061B';
// Support the extra 4 Persian letters (p), (ch), (zh) and (g)
// This needs value in getGlyphs function to be 52 instead of 48
// $this->_glyphs .= chr(129).chr(141).chr(142).chr(144);
// $this->_hex .= 'FB56FB57FB58FB59FB7AFB7BFB7CFB7DFB8AFB8BFB8AFB8BFB92';
// $this->_hex .= 'FB93FB94FB95';
// $this->_prevLink .= chr(129).chr(141).chr(142).chr(144);
// $this->_nextLink .= chr(129).chr(141).chr(142).chr(144);
// Example: $text = 'نمونة قلم: لاگچ ژافپ';
// Email Yossi Beck <yosbeck@gmail.com> ask him to save that example
// string using ANSI encoding in Notepad
$this->_glyphs .= 'لآلألإلا';
$this->_hex .= 'FEF5FEF6FEF5FEF6FEF7FEF8FEF7FEF8FEF9FEFAFEF9FEFAFEFBFEFC';
$this->_hex .= 'FEFBFEFC';
* @param string $char Char
* @param integer $type Type
$pos = $pos* 16 + $type* 4;
return substr($this->_hex, $pos, 4);
* Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
* @param string $str Arabic string in Windows-1256 charset
* @return string Arabic glyph joining in UTF-8 hexadecimals stream
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
for ($i = 0; $i < $_temp; $i++ ) {
for ($i = $max - 1; $i >= 0; $i-- ) {
$prevChar = $chars[$i - 1];
if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
$prevChar = $chars[$i - 2];
if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
$prevChar = $chars[$i - 3];
if ($crntChar && mb_strpos($flip_arr, $crntChar) !== false) {
$crntChar = $ReversedChr[mb_strpos($flip_arr, $crntChar)];
if ($crntChar && !$Reversed
&& (mb_strpos($ReversedChr, $crntChar) !== false)
$crntChar = $flip_arr[mb_strpos($ReversedChr, $crntChar)];
if (ord($crntChar) < 128) {
if ($crntChar == 'ل' && isset ($chars[$i + 1])
&& (mb_strpos('آأإا', $chars[$i + 1]) !== false)
if ($crntChar && mb_strpos($this->_vowel, $crntChar) !== false) {
if (isset ($chars[$i + 1])
&& (mb_strpos($this->_nextLink, $chars[$i + 1]) !== false)
&& (mb_strpos($this->_prevLink, $prevChar) !== false)
$output .= '&#x' . $this->getGlyphs($crntChar, 1) . ';';
$output .= '&#x' . $this->getGlyphs($crntChar, 0) . ';';
if (($prevChar == 'لا' || $prevChar == 'لآ' || $prevChar == 'لأ'
|| $prevChar == 'لإ' || $prevChar == 'ل')
if (mb_strpos($this->_prevLink, $chars[$i - 2]) !== false) {
if (mb_strpos($this->_vowel, $chars[$i - 1])) {
$output .= $this->getGlyphs($crntChar, $form). ';';
$output .= $this->getGlyphs($prevChar. $crntChar, $form). ';';
if ($prevChar && mb_strpos($this->_prevLink, $prevChar) !== false) {
if ($nextChar && mb_strpos($this->_nextLink, $nextChar) !== false) {
$output .= '&#x' . $this->getGlyphs($crntChar, $form) . ';';
// from Arabic Presentation Forms-B, Range: FE70-FEFF,
// file "UFE70.pdf" (in reversed order)
// into Arabic Presentation Forms-A, Range: FB50-FDFF, file "UFB50.pdf"
// Example: $output = str_replace('ﺠﻟ', 'ﳉ', $output);
* Regression analysis calculate roughly the max number of character fit in
* one A4 page line for a given font size.
* @param integer $font Font size
* @return integer Maximum number of characters per line
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
$x = 381.6 - 31.57 * $font + 1.182 * pow($font, 2) - 0.02052 *
pow($font, 3) + 0.0001342 * pow($font, 4);
* Calculate the lines number of given Arabic text and font size that will
* @param string $str Arabic string you would like to split it into lines
* @param integer $font Font size
* @return integer Number of lines for a given Arabic string in A4 page size
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public function a4Lines($str, $font)
$str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
$w_count = count($words);
for ($i = 0; $i < $w_count; $i++ ) {
if ($chars + $w_len < $max_chars) {
$words_nl = explode("\n", $words[$i]);
$nl_num = count($words_nl) - 1;
for ($j = 1; $j < $nl_num; $j++ ) {
* Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
* hexadecimals stream (take care of whole the document including English
* sections as well as numbers and arcs etc...)
* @param string $str Arabic string in Windows-1256 charset
* @param integer $max_chars Max number of chars you can fit in one line
* @param boolean $hindo If true use Hindo digits else use Arabic digits
* @return string Arabic glyph joining in UTF-8 hexadecimals stream (take
* care of whole document including English sections as well
* as numbers and arcs etc...)
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public function utf8Glyphs($str, $max_chars = 50, $hindo = true)
$str = str_replace(array("\r\n", "\n", "\r"), " \n ", $str);
$w_count = count($words);
for ($i = 0; $i < $w_count; $i++ ) {
$pattern .= '[a-z\d\\/\@\#\$\%\^\&\*\(\)\_\~\"\'\[\]\{\}\;\,\|\-\.\:!]*';
$pattern .= '([\.\:\+\=\-\!،؟]?)$/i';
$words[$i] = mb_substr($words[$i], 1). $matches[1];
$words[$i] = $matches[2]. mb_substr($words[$i], 0, - 1);
$words[$i] = strrev($words[$i]);
} elseif ($en_index != - 1) {
$en_count = count($english);
for ($j = 0; $j < $en_count; $j++ ) {
$words[$en_index + $j] = $english[$en_count - 1 - $j];
$en_count = count($english);
for ($j = 0; $j < $en_count; $j++ ) {
$words[$en_index + $j] = $english[$en_count - 1 - $j];
// need more work to fix lines starts by English words
foreach ($en_words as $key => $value) {
while (list ($from, $to) = array_pop($en_stack)) {
for ($i = $from; $i <= $to; $i++ ) {
$new_words[] = $words[$i];
for ($i = 0; $i < $w_count; $i++ ) {
if ($c_chars + $w_len < $max_chars) {
$words_nl = explode("\n", $words[$i]);
$nl_num = count($words_nl) - 1;
for ($j = 1; $j < $nl_num; $j++ ) {
$c_words = array($words_nl[$nl_num]);
$c_chars = mb_strlen($words_nl[$nl_num]) + 1;
$c_words = array($words[$i]);
$maxLine = count($lines);
for ($j = $maxLine - 1; $j >= 0; $j-- ) {
$output .= $lines[$j] . "\n";
$output = rtrim($output);
foreach ($nums as $k => $v) {
$p_nums[$k] = '/'. $v. '/ui';
foreach ($arNums as $k => $v) {
$p_arNums[$k] = '/([a-z-\d]+)'. $v. '/ui';
foreach ($nums as $k => $v) {
foreach ($arNums as $k => $v) {
$p_arNums[$k] = '/'. $v. '([a-z-\d]+)/ui';
foreach ($nums as $k => $v) {
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
* Double-escaped entities will only be decoded once
* ("&lt;" becomes "<", not "<").
* @param string $text The text to decode entities in.
* @param array $exclude An array of characters which should not be decoded.
* For example, array('<', '&', '"'). This affects
* both named and numerical entities.
// We store named entities in a table for quick processing.
// Get all named HTML entities.
// PHP gives us ISO-8859-1 data, we need UTF-8.
// Use a regexp to select all entities in one pass, to avoid decoding
// double-escaped entities twice.
//return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e',
// '$this->decodeEntities2("$1", "$2", "$0", $newtable,
foreach ($pieces as $piece) {
$two = mb_substr($piece, $start, $end - $start);
$zero = '&'. $one. $two. ';';
* Helper function for decodeEntities
* @param string $prefix Prefix
* @param string $codepoint Codepoint
* @param string $original Original
* @param array &$table Store named entities in a table
* @param array &$exclude An array of characters which should not be decoded
$prefix, $codepoint, $original, &$table, &$exclude
if (isset ($table[$original])) {
return $table[$original];
// Hexadecimal numerical entity
// Encode codepoint as UTF-8 bytes
} elseif ($codepoint < 0x800) {
$str = chr(0xC0 | ($codepoint >> 6)) .
chr(0x80 | ($codepoint & 0x3F));
} elseif ($codepoint < 0x10000) {
$str = chr(0xE0 | ($codepoint >> 12)) .
chr(0x80 | (($codepoint >> 6) & 0x3F)) .
chr(0x80 | ($codepoint & 0x3F));
} elseif ($codepoint < 0x200000) {
$str = chr(0xF0 | ($codepoint >> 18)) .
chr(0x80 | (($codepoint >> 12) & 0x3F)) .
chr(0x80 | (($codepoint >> 6) & 0x3F)) .
chr(0x80 | ($codepoint & 0x3F));
// Check for excluded characters
|