Commit 4222d158 authored by mso's avatar mso

Version 0.5.2 - 2007-09-27

git-svn-id: svn+ssh://85.214.81.201/srv/svn/repos/idna_convert/trunk@3156 35e3bc58-21ef-11dd-9788-dfbd14258a26
parent 248e1211
......@@ -42,11 +42,12 @@ with the upcoming PHP6, too.
Files
-----
idna_convert.class.php - The actual class
idna_convert.create.npdata.php - Useful for (re)creating the NPData file
npdata.ser - Serialized data for NamePrep
example.php - An example web page for converting
transcode_wrapper.php - Convert various encodings, see below
uctc.php - phlyLabs' Unicode Transcoder, see below
ReadMe.txt - This file
LICENCE - The LGPL licence file
......@@ -57,11 +58,10 @@ itself!
Examples
--------
1. Say we wish to encode the domain name nörgler.com:
// Include the class
include_once('idna_convert.class.php');
require_once('idna_convert.class.php');
// Instantiate it *
$IDN = new idna_convert();
// The input string, if input is not UTF-8 or UCS-4, it must be converted before
......@@ -76,7 +76,7 @@ echo $output; // This will read: xn--nrgler-wxa.com
the domain name reads originally
// Include the class
include_once('idna_convert.class.php');
require_once('idna_convert.class.php');
// Instantiate it (depending on the version you are using) with
$IDN = new idna_convert();
// The input string
......@@ -93,7 +93,7 @@ echo utf8_decode($output); // This will read: andre@börse.knörz.info
format to be used
// Include the class
include_once('idna_convert.class.php');
require_once('idna_convert.class.php');
// Instantiate it
$IDN = new dinca_convert();
// Iterate through the input file line by line
......@@ -105,16 +105,55 @@ foreach (file('ucs4-domains.txt') as $line) {
NPData
------
Should you need to recreate the npdata.ser file, which holds all necessary translation
tables in a serialized format, you can run the file idna_convert.create.npdata.php, which
creates the file for you and stores it in the same folder, where it is placed.
Should you need to do changes to the tables you can do so, but beware of the consequences.
Transcode wrapper
-----------------
In case you have strings in different encoding than ISO-8859-1 and UTF-8 you might need to
translate these strings to UTF-8 before feeding the IDNA converter with it.
PHP's built in functions utf8_encode() and utf8_decode() can only deal with ISO-8859-1.
Use the file transcode_wrapper.php for the conversion. It requires either iconv, libiconv
or mbstring installed together with one of the relevant PHP extensions.
The functions you will find useful are
encode_utf8() as a replacement for utf8_encode() and
decode_utf8() as a replacement for utf8_decode().
Example usage:
<?php
require_once('idna_convert.class.php');
require_once('transcode_wrapper.php');
$mystring = '<something in e.g. ISO-8859-15';
$mystring = encode_utf8($mystring, 'ISO-8859-15');
echo $IDN->encode($mystring);
?>
UCTC - Unicode Transcoder
-------------------------
Another class you might find useful when dealing with one or more of the Unicode encoding
flavours. The class is static, it requires PHP5. It can transcode into each other:
- UCS-4 string / array
- UTF-8
- UTF-7
- UTF-7 IMAP (modified UTF-7)
All encodings expect / return a string in the given format, with one major exception:
UCS-4 array is jsut an array, where each value represents one codepoint in the string, i.e.
every value is a 32bit integer value.
Example usage:
<?php
require_once('uctc.php');
$mystring = 'nörgler.com';
echo uctc::convert($mystring, 'utf8', 'utf7imap');
?>
Contact us
----------
In case of errors, bugs, questions, wishes, please don't hesitate to contact us
under the email address above.
......
<?php
/**
* transcode wrapper functions
* @package IDNA Convert
* @subpackage charset transcoding
* @author Matthias Sommerfeld, <mso@phlylabs.de>
* @version 0.1.0
*/
/**
* Convert a string from any of various encodings to UTF-8
*
* @param string String to encode
*[@param string Encoding; Default: ISO-8859-1]
*[@param bool Safe Mode: if set to TRUE, the original string is retunred on errors]
* @return string The encoded string or false on failure
* @since 0.0.1
*/
function encode_utf8($string = '', $encoding = 'iso-8859-1', $safe_mode = false)
{
$safe = ($safe_mode) ? $string : false;
if (strtoupper($encoding) == 'UTF-8' || strtoupper($encoding) == 'UTF8') {
return $string;
} elseif (strtoupper($encoding) == 'ISO-8859-1') {
return utf8_encode($string);
} elseif (strtoupper($encoding) == 'WINDOWS-1252') {
return utf8_encode(map_w1252_iso8859_1($string));
} elseif (strtoupper($encoding) == 'UNICODE-1-1-UTF-7') {
$encoding = 'utf-7';
}
if (function_exists('mb_convert_encoding')) {
$conv = @mb_convert_encoding($string, 'UTF-8', strtoupper($encoding));
if ($conv) return $conv;
}
if (function_exists('iconv')) {
$conv = @iconv(strtoupper($encoding), 'UTF-8', $string);
if ($conv) return $conv;
}
if (function_exists('libiconv')) {
$conv = @libiconv(strtoupper($encoding), 'UTF-8', $string);
if ($conv) return $conv;
}
return $safe;
}
/**
* Convert a string from UTF-8 to any of various encodings
*
* @param string String to decode
*[@param string Encoding; Default: ISO-8859-1]
*[@param bool Safe Mode: if set to TRUE, the original string is retunred on errors]
* @return string The decoded string or false on failure
* @since 0.0.1
*/
function decode_utf8($string = '', $encoding = 'iso-8859-1', $safe_mode = false)
{
$safe = ($safe_mode) ? $string : false;
if (!$encoding) $encoding = 'ISO-8859-1';
if (strtoupper($encoding) == 'UTF-8' || strtoupper($encoding) == 'UTF8') {
return $string;
} elseif (strtoupper($encoding) == 'ISO-8859-1') {
return utf8_decode($string);
} elseif (strtoupper($encoding) == 'WINDOWS-1252') {
return map_iso8859_1_w1252(utf8_decode($string));
} elseif (strtoupper($encoding) == 'UNICODE-1-1-UTF-7') {
$encoding = 'utf-7';
}
if (function_exists('mb_convert_encoding')) {
$conv = @mb_convert_encoding($string, strtoupper($encoding), 'UTF-8');
if ($conv) return $conv;
}
if (function_exists('iconv')) {
$conv = @iconv('UTF-8', strtoupper($encoding), $string);
if ($conv) return $conv;
}
if (function_exists('libiconv')) {
$conv = @libiconv('UTF-8', strtoupper($encoding), $string);
if ($conv) return $conv;
}
return $safe;
}
/**
* Special treatment for our guys in Redmond
* Windows-1252 is basically ISO-8859-1 -- with some exceptions, which get accounted for here
* @param string Your input in Win1252
* @param string The resulting ISO-8859-1 string
* @since 3.0.8
*/
function map_w1252_iso8859_1($string = '')
{
if ($string == '') return '';
$return = '';
for ($i = 0; $i < strlen($string); ++$i) {
$c = ord($string{$i});
switch ($c) {
case 129: $return .= chr(252); break;
case 132: $return .= chr(228); break;
case 142: $return .= chr(196); break;
case 148: $return .= chr(246); break;
case 153: $return .= chr(214); break;
case 154: $return .= chr(220); break;
case 225: $return .= chr(223); break;
default: $return .= chr($c); break;
}
}
return $return;
}
/**
* Special treatment for our guys in Redmond
* Windows-1252 is basically ISO-8859-1 -- with some exceptions, which get accounted for here
* @param string Your input in ISO-8859-1
* @param string The resulting Win1252 string
* @since 3.0.8
*/
function map_iso8859_1_w1252($string = '')
{
if ($string == '') return '';
$return = '';
for ($i = 0; $i < strlen($string); ++$i) {
$c = ord($string{$i});
switch ($c) {
case 196: $return .= chr(142); break;
case 214: $return .= chr(153); break;
case 220: $return .= chr(154); break;
case 223: $return .= chr(225); break;
case 228: $return .= chr(132); break;
case 246: $return .= chr(148); break;
case 252: $return .= chr(129); break;
default: $return .= chr($c); break;
}
}
return $return;
}
?>
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment