Source for file Transliteration.php
Documentation is available at Transliteration.php
* ----------------------------------------------------------------------
* Copyright (c) 2006-2016 Khaled Al-Sham'aa.
* ----------------------------------------------------------------------
* This program is open source product; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License (LGPL)
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
* ----------------------------------------------------------------------
* Class Name: English-Arabic Transliteration
* Filename: Transliteration.php
* Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
* Purpose: Transliterate English words into Arabic by render them
* in the orthography of the Arabic language and vise versa
* ----------------------------------------------------------------------
* English-Arabic Transliteration
* PHP class transliterate English words into Arabic by render them in the
* orthography of the Arabic language and vise versa.
* Out of vocabulary (OOV) words are a common source of errors in cross language
* information retrieval. Bilingual dictionaries are often limited in their coverage
* of named- entities, numbers, technical terms and acronyms. There is a need to
* generate translations for these "on-the-fly" or at query time.
* A significant proportion of OOV words are named entities and technical terms.
* Typical analyses find around 50% of OOV words to be named entities. Yet these
* can be the most important words in the queries. Cross language retrieval
* performance (average precision) reduced more than 50% when named entities in the
* queries were not translated.
* When the query language and the document language share the same alphabet it may
* be sufficient to use the OOV word as its own translation. However, when the two
* languages have different alphabets, the query term must somehow be rendered in
* the orthography of the other language. The process of converting a word from one
* orthography into another is called transliteration.
* Foreign words often occur in Arabic text as transliteration. This is the case for
* many categories of foreign words, not just proper names but also technical terms
* such as caviar, telephone and internet.
* include('./I18N/Arabic.php');
* $obj = new I18N_Arabic('Transliteration');
* $ar_word_1 = $obj->en2ar($en_word_1);
* $en_word_2 = $obj->ar2en($ar_word_2);
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
* This PHP class transliterate English words into Arabic
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
* @copyright 2006-2016 Khaled Al-Sham'aa
* @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
* @link http://www.ar-php.org
private static $_arFinePatterns = array("/'+/u", "/([\- ])'/u", '/(.)#/u');
private static $_arFineReplacements = array("'", '\\1', "\\1'\\1");
private static $_en2arPregSearch = array();
private static $_en2arPregReplace = array();
private static $_en2arStrSearch = array();
private static $_en2arStrReplace = array();
private static $_ar2enPregSearch = array();
private static $_ar2enPregReplace = array();
private static $_ar2enStrSearch = array();
private static $_ar2enStrReplace = array();
private static $_diariticalSearch = array();
private static $_diariticalReplace = array();
private static $_iso233Search = array();
private static $_iso233Replace = array();
private static $_rjgcSearch = array();
private static $_rjgcReplace = array();
private static $_sesSearch = array();
private static $_sesReplace = array();
* Loads initialize values
public function __construct()
foreach ($xml->xpath("//preg_replace[@function='ar2en']/pair") as $pair) {
array_push(self::$_ar2enPregSearch, (string) $pair->search);
array_push(self::$_ar2enPregReplace, (string) $pair->replace);
$xml->xpath("//str_replace[@function='diaritical']/pair") as $pair
array_push(self::$_diariticalSearch, (string) $pair->search);
array_push(self::$_diariticalReplace, (string) $pair->replace);
foreach ($xml->xpath("//str_replace[@function='ISO233']/pair") as $pair) {
array_push(self::$_iso233Search, (string) $pair->search);
array_push(self::$_iso233Replace, (string) $pair->replace);
foreach ($xml->xpath("//str_replace[@function='RJGC']/pair") as $pair) {
array_push(self::$_rjgcSearch, (string) $pair->search);
array_push(self::$_rjgcReplace, (string) $pair->replace);
foreach ($xml->xpath("//str_replace[@function='SES']/pair") as $pair) {
array_push(self::$_sesSearch, (string) $pair->search);
array_push(self::$_sesReplace, (string) $pair->replace);
foreach ($xml->xpath("//str_replace[@function='ar2en']/pair") as $pair) {
array_push(self::$_ar2enStrSearch, (string) $pair->search);
array_push(self::$_ar2enStrReplace, (string) $pair->replace);
foreach ($xml->xpath("//preg_replace[@function='en2ar']/pair") as $pair) {
array_push(self::$_en2arPregSearch, (string) $pair->search);
array_push(self::$_en2arPregReplace, (string) $pair->replace);
foreach ($xml->xpath("//str_replace[@function='en2ar']/pair") as $pair) {
array_push(self::$_en2arStrSearch, (string) $pair->search);
array_push(self::$_en2arStrReplace, (string) $pair->replace);
* Transliterate English string into Arabic by render them in the
* orthography of the Arabic language
* @param string $string English string you want to transliterate
* @param string $locale Locale information (e.g. 'en_GB' or 'de_DE')
* @return String Out of vocabulary English string in Arabic characters
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function en2ar($string, $locale= 'en_US')
setlocale(LC_ALL, $locale);
$string = iconv("UTF-8", "ASCII//TRANSLIT", $string);
foreach ($words as $word) {
self::$_en2arPregReplace, $word
* Transliterate Arabic string into English by render them in the
* orthography of the English language
* @param string $string Arabic string you want to transliterate
* @param string $standard Transliteration standard, default is UNGEGN
* and possible values are [UNGEGN, UNGEGN+, RJGC,
* @return String Out of vocabulary Arabic string in English characters
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function ar2en($string, $standard= 'UNGEGN')
//$string = str_replace('ة ال', 'tul', $string);
$words = explode(' ', $string);
for ($i= 0; $i< count($words)- 1; $i++ ) {
foreach ($words as $word) {
if ($standard == 'UNGEGN+') {
self::$_diariticalSearch,
self::$_diariticalReplace,
} else if ($standard == 'RJGC') {
self::$_diariticalSearch,
self::$_diariticalReplace,
} else if ($standard == 'SES') {
self::$_diariticalSearch,
self::$_diariticalReplace,
} else if ($standard == 'ISO233') {
self::$_ar2enPregReplace,
self::$_arFineReplacements,
if (preg_match('/[a-z]/', mb_substr($temp, 0, 1))) {
$temp2 = substr($temp, 0, $pos);
$temp2 .= substr($temp, $pos+ 2);
* Render numbers in given string using HTML entities that will show them as
* Arabic digits (i.e. 1, 2, 3, etc.) whatever browser language settings are
* (if browser supports UTF-8 character set).
* @param string $string String includes some digits here or there
* @return String Original string after replace digits by HTML entities that
* will show given number using Indian digits
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function enNum($string)
foreach ($digits as $digit) {
$html .= preg_match('/\d/', $digit) ? "$digit;" : $digit;
* Render numbers in given string using HTML entities that will show them as
* Indian digits (i.e. ١, ٢, ٣, etc.) whatever browser language settings are
* (if browser supports UTF-8 character set).
* @param string $string String includes some digits here or there
* @return String Original string after replace digits by HTML entities that
* will show given number using Arabic digits
* @author Khaled Al-Sham'aa <khaled@ar-php.org>
public static function arNum($string)
foreach ($digits as $digit) {
$html .= preg_match('/\d/', $digit) ? "f$digit;" : $digit;
|