Commit f6ad30af authored by mso's avatar mso

Version 0.6.4 - 2010-10-13

git-svn-id: svn+ssh://85.214.81.201/srv/svn/repos/idna_convert/trunk@3160 35e3bc58-21ef-11dd-9788-dfbd14258a26
parent 8c391ab8
......@@ -4,7 +4,7 @@
* *
* http://idnaconv.phlymail.de mailto:phlymail@phlylabs.de *
*******************************************************************************
* (c) 2004-2008 phlyLabs, Berlin *
* (c) 2004-2010 phlyLabs, Berlin *
* This file is encoded in UTF-8 *
*******************************************************************************
......@@ -34,7 +34,7 @@ arrays. The default format is UTF-8. For setting different encodings, you can
call the method setParams() - please see the inline documentation for details.
ACE strings (the Punycode form) are always 7bit ASCII strings.
ATTENTION: As of version 0.6.0 of this class it is written in the OOP style of PHP5.
ATTENTION: As of version 0.6.0 this class is written in the OOP style of PHP5.
Since PHP4 is no longer actively maintained, you should switch to PHP5 as fast as
possible.
We expect to see no compatibility issues with the upcoming PHP6, too.
......@@ -58,7 +58,7 @@ Examples
// Include the class
require_once('idna_convert.class.php');
// Instantiate it *
// Instantiate it
$IDN = new idna_convert();
// The input string, if input is not UTF-8 or UCS-4, it must be converted before
$input = utf8_encode('nörgler.com');
......@@ -73,7 +73,7 @@ echo $output; // This will read: xn--nrgler-wxa.com
// Include the class
require_once('idna_convert.class.php');
// Instantiate it (depending on the version you are using) with
// Instantiate it
$IDN = new idna_convert();
// The input string
$input = 'andre@xn--brse-5qa.xn--knrz-1ra.info';
......@@ -99,6 +99,22 @@ foreach (file('ucs4-domains.txt') as $line) {
}
4. We wish to convert a whole URI into the IDNA form, but leave the path or
query string component of it alone. Just using encode() would lead to mangled
paths or query strings. Here the public method encode_uri() comes into play:
// Include the class
require_once('idna_convert.class.php');
// Instantiate it
$IDN = new idna_convert();
// The input string, a whole URI in UTF-8 (!)
$input = 'http://nörgler:secret@nörgler.com/my_päth_is_not_ÄSCII/');
// Encode it to its punycode presentation
$output = $IDN->encode_uri($input);
// Output, what we got now
echo $output; // http://nörgler:secret@xn--nrgler-wxa.com/my_päth_is_not_ÄSCII/
Transcode wrapper
-----------------
In case you have strings in different encoding than ISO-8859-1 and UTF-8 you might need to
......
<?php
$encoded = '';
$decoded = '';
$add = '';
header('Content-Type: text/html; charset=UTF-8');
$encoded = $decoded = $add = '';
header('Content-Type: text/html; charset=utf-8');
require_once('idna_convert.class.php');
$IDN = new idna_convert();
if (isset($_REQUEST['encode'])) {
......@@ -13,87 +11,88 @@ if (isset($_REQUEST['decode'])) {
$encoded = isset($_REQUEST['encoded']) ? stripslashes($_REQUEST['encoded']) : '';
$decoded = $IDN->decode($encoded);
}
$lang = 'en';
if (isset($_REQUEST['lang'])) {
if ('de' == $_REQUEST['lang'] || 'en' == $_REQUEST['lang']) $lang = $_REQUEST['lang'];
$add .= '<input type="hidden" name="lang" value="'.$_REQUEST['lang'].'" />'."\n";
} else {
$lang = 'en';
}
?>
<!DOCTYPE html public "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>phlyLabs Punycode Converter</title>
<meta name="author" content="phlyLabs">
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<meta name="author" content="phlyLabs" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<style type="text/css">
body { color:black;background:white;font-size:10pt;font-family:Verdana, Helvetica, Sans-Serif; }
body, form { margin:0px; }
/*<![CDATA[*/
body { color:black;background:white;font-size:10pt;font-family:Verdana,Helvetica,Sans-Serif; }
body, form { margin:0; }
form { display:inline; }
input { font-size:8pt;font-family:Verdana, Helvetica, Sans-Serif; }
#mitte { text-align:center;vertical-align:middle; }
#round { background-color:rgb(230, 230, 240);border:1px solid black;text-align:center;vertical-align:middle;padding:10px; }
.thead { font-size:9pt;font-weight:bold; }
#copy { font-size:8pt;color:rgb(60, 60, 80); }
input { font-size:8pt;font-family:Verdana,Helvetica,Sans-Serif; }
#round { width:730px;padding:10px;background-color:rgb(230,230,240);border:1px solid black;text-align:center;vertical-align:middle;margin:auto;margin-top:50px; }
th { font-size:9pt;font-weight:bold; }
#copy { font-size:8pt;color:rgb(60,60,80); }
#subhead { font-size:8pt; }
#bla { font-size:8pt;text-align:left; }
h5 {margin:0;font-size:11pt;font-weight:bold;}
/*]]>*/
</style>
</head>
<body>
<table width="768" border="0" cellpadding="50" cellspacing="0">
<tr>
<td id="mitte">
<div id="round">
<strong>phlyLabs` pure PHP IDNA Converter</strong><br />
<span id="subhead">
See <a href="http://faqs.org/rfcs/rfc3490.html" title="IDNA" target="_blank">RFC3490</a>,
<a href="http://faqs.org/rfcs/rfc3491.html" title="Nameprep, a Stringprep profile" target="_blank">RFC3491</a>,
<a href="http://faqs.org/rfcs/rfc3492.html" title="Punycode" target="_blank">RFC3492</a> and
<a href="http://faqs.org/rfcs/rfc3454.html" title="Stringprep" target="_blank">RFC3454</a><br />
</span>
<br />
<div id="bla"><?php if ($lang == 'de') { ?>
Dieser Konverter erlaubt die Übersetzung von Domainnamen zwischen der Punycode- und der
Unicode-Schreibweise.<br />
Geben Sie einfach den Domainnamen im entsprechend bezeichneten Feld ein und klicken Sie dann auf den darunter
liegenden Button. Sie können einfache Domainnamen, komplette URLs (wie http://jürgen-müller.de)
oder Emailadressen eingeben.<br />
<br />
Stellen Sie aber sicher, dass Ihr Browser den Zeichensatz <strong>UTF-8</strong> unterstützt.<br />
<br />
Wenn Sie Interesse an der zugrundeliegenden PHP-Klasse haben, können Sie diese
<a href="http://phlymail.com/de/downloads/idna/download/">hier herunterladen</a>.<br />
<br />
Diese Klasse wird ohne Garantie ihrer Funktionstüchtigkeit bereit gestellt. Nutzung auf eigene Gefahr.<br />
Um sicher zu stellen, dass eine Zeichenkette korrekt umgewandelt wurde, sollten Sie diese immer zurückwandeln
und das Ergebnis mit Ihrer ursprünglichen Eingabe vergleichen.<br />
<br />
Fehler und Probleme können Sie gern an <a href="mailto:team@phlymail.com">team@phlymail.com</a> senden.<br />
<?php } else { ?>
This converter allows you to transfer domain names between the encoded (Punycode) notation
and the decoded (UTF-8) notation.<br />
Just enter the domain name in the respective field and click on the button right below it to have
it converted. Please note, that you might even enter complete domain names (like j&#xFC;rgen-m&#xFC;ller.de)
or a email addresses.<br />
<br />
Make sure, that your browser is capable of the <strong>UTF-8</strong> character encoding.<br />
<br />
For those of you interested in the PHP source of the underlying class, you might
<a href="http://phlymail.com/en/downloads/idna/download/">download it here</a>.<br />
<br />
Please be aware, that this class is provided as is and without any liability. Use at your own risk.<br />
To ensure, that a certain string has been converted correctly, you should convert it both ways and compare the
results.<br />
<br />
Please feel free to report bugs and problems to: <a href="mailto:team@phlymail.com">team@phlymail.com</a>.<br />
<?php } ?>
<br />
</div>
<table border="0" cellpadding="2" cellspacing="2" align="center">
<div id="round">
<h5>phlyLabs' pure PHP IDNA Converter</h5><br />
<span id="subhead">
See <a href="http://faqs.org/rfcs/rfc3490.html" title="IDNA" target="_blank">RFC3490</a>,
<a href="http://faqs.org/rfcs/rfc3491.html" title="Nameprep, a Stringprep profile" target="_blank">RFC3491</a>,
<a href="http://faqs.org/rfcs/rfc3492.html" title="Punycode" target="_blank">RFC3492</a> and
<a href="http://faqs.org/rfcs/rfc3454.html" title="Stringprep" target="_blank">RFC3454</a><br />
</span>
<br />
<div id="bla"><?php if ($lang == 'de') { ?>
Dieser Konverter erlaubt die Übersetzung von Domainnamen zwischen der Punycode- und der
Unicode-Schreibweise.<br />
Geben Sie einfach den Domainnamen im entsprechend bezeichneten Feld ein und klicken Sie dann auf den darunter
liegenden Button. Sie können einfache Domainnamen, komplette URLs (wie http://jürgen-müller.de)
oder Emailadressen eingeben.<br />
<br />
Stellen Sie aber sicher, dass Ihr Browser den Zeichensatz <strong>UTF-8</strong> unterstützt.<br />
<br />
Wenn Sie Interesse an der zugrundeliegenden PHP-Klasse haben, können Sie diese
<a href="http://phlymail.com/de/downloads/idna/download/">hier herunterladen</a>.<br />
<br />
Diese Klasse wird ohne Garantie ihrer Funktionstüchtigkeit bereit gestellt. Nutzung auf eigene Gefahr.<br />
Um sicher zu stellen, dass eine Zeichenkette korrekt umgewandelt wurde, sollten Sie diese immer zurückwandeln
und das Ergebnis mit Ihrer ursprünglichen Eingabe vergleichen.<br />
<br />
Fehler und Probleme können Sie gern an <a href="mailto:team@phlymail.de">team@phlymail.de</a> senden.<br />
<?php } else { ?>
This converter allows you to transfer domain names between the encoded (Punycode) notation
and the decoded (UTF-8) notation.<br />
Just enter the domain name in the respective field and click on the button right below it to have
it converted. Please note, that you might even enter complete domain names (like j&#xFC;rgen-m&#xFC;ller.de)
or a email addresses.<br />
<br />
Make sure, that your browser is capable of the <strong>UTF-8</strong> character encoding.<br />
<br />
For those of you interested in the PHP source of the underlying class, you might
<a href="http://phlymail.com/en/downloads/idna/download/">download it here</a>.<br />
<br />
Please be aware, that this class is provided as is and without any liability. Use at your own risk.<br />
To ensure, that a certain string has been converted correctly, you should convert it both ways and compare the
results.<br />
<br />
Please feel free to report bugs and problems to: <a href="mailto:team@phlymail.com">team@phlymail.com</a>.<br />
<?php } ?>
<br />
</div>
<table border="0" cellpadding="2" cellspacing="2" align="center">
<thead>
<tr>
<td class="thead" align="left">Original (Unicode)</td>
<td class="thead" align="right">Punycode (ACE)</td>
<th align="left">Original (Unicode)</th>
<th align="right">Punycode (ACE)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="right">
<form action="<?php echo $_SERVER['PHP_SELF']; ?>" method="get">
......@@ -108,11 +107,10 @@ input { font-size:8pt;font-family:Verdana, Helvetica, Sans-Serif; }
</form>
</td>
</tr>
</table><br />
<span id="copy">Version used: 0.6.2; (c) <a href="http://phlylabs.de">phlyLabs</a> 2004-2009</span>
</tbody>
</table>
<br />
<span id="copy">Version used: 0.6.4; &copy; 2004-2010 phlyLabs Berlin; part of <a href="http://phlymail.com/">phlyMail</a></span>
</div>
</td>
</tr>
</table>
</body>
</html>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -2,7 +2,7 @@
/**
* UCTC - The Unicode Transcoder
*
* Converts between various flavours of Unicode representations like UCS-4 or UTF8
* Converts between various flavours of Unicode representations like UCS-4 or UTF-8
* Supported schemes:
* - UCS-4 Little Endian / Big Endian / Array (partially)
* - UTF-16 Little Endian / Big Endian (not yet)
......@@ -10,9 +10,10 @@
* - UTF-7
* - UTF-7 IMAP (modified UTF-7)
*
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @version 0.0.5
* @package phlyMail
* @package phlyMail Nahariya 4.0+ Default branch
* @author Matthias Sommerfeld <mso@phlyLabs.de>
* @copyright 2003-2009 phlyLabs Berlin, http://phlylabs.de
* @version 0.0.6 2009-05-10
*/
class uctc {
private static $mechs = array('ucs4', /*'ucs4le', 'ucs4be', */'ucs4array', /*'utf16', 'utf16le', 'utf16be', */'utf8', 'utf7', 'utf7imap');
......@@ -44,12 +45,12 @@ class uctc {
}
/**
* This converts an UTF-8 encoded string to its UCS-4 representation
*
* @param string $input The UTF-8 string to convert
* @return array Array of 32bit values representing each codepoint
* @access private
*/
* This converts an UTF-8 encoded string to its UCS-4 representation
*
* @param string $input The UTF-8 string to convert
* @return array Array of 32bit values representing each codepoint
* @access private
*/
private static function utf8_ucs4array($input)
{
$output = array();
......@@ -136,10 +137,10 @@ class uctc {
}
/**
* Convert UCS-4 string into UTF-8 string
* See utf8_ucs4array() for details
* @access private
*/
* Convert UCS-4 string into UTF-8 string
* See utf8_ucs4array() for details
* @access private
*/
private static function ucs4array_utf8($input)
{
$output = '';
......@@ -227,30 +228,32 @@ class uctc {
$output = '';
$mode = 'd';
$b64 = '';
foreach ($input as $v) {
$is_direct = (0x20 <= $v && $v <= 0x7e && $v != ord($sc));
while (true) {
$v = (!empty($input)) ? array_shift($input) : false;
$is_direct = (false !== $v) ? (0x20 <= $v && $v <= 0x7e && $v != ord($sc)) : true;
if ($mode == 'b') {
if ($is_direct) {
if ($b64 == chr(0).$sc) {
$output .= $sc.'-';
$b64 = '';
} else {
while (strlen($b64) % 3) $b64 .= chr(0);
$output .= $sc.base64_encode($b64).'-';
} elseif ($b64) {
$output .= $sc.str_replace('=', '', base64_encode($b64)).'-';
$b64 = '';
}
$mode = 'd';
} else {
$b64 .= (chr(($v >> 8) & 255). chr($v & 255));
} elseif (false !== $v) {
$b64 .= chr(($v >> 8) & 255). chr($v & 255);
}
}
if ($mode == 'd') {
if ($mode == 'd' && false !== $v) {
if ($is_direct) {
$output .= chr($v);
} else {
$b64 = (chr(($v >> 8) & 255). chr($v & 255));
$b64 = chr(($v >> 8) & 255). chr($v & 255);
$mode = 'b';
}
}
if (false === $v && $b64 == '') break;
}
return $output;
}
......@@ -293,6 +296,5 @@ class uctc {
}
return $output;
}
}
?>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment