ISO / UTF-8 Conversion
UTF-8/ISO-8859-1 Conversion
|
|
|
[ home ]
-
[ search ]
-
[ sitemap ]
UTF-8 Conversion
#!/usr/bin/perl -w
use strict;
$| = 1;
# Usage:
# ./utf8.pl utf Zürich
# ./utf8.pl iso Zürich
print &decode( string => $ARGV[1] ,
from => 'ISO-8859-1',
to => 'UTF-8' ), "\n" if $ARGV[0] eq 'iso';
print &decode( string => $ARGV[1] ,
from => 'UTF-8' ,
to => 'ISO-8859-1' ), "\n" if $ARGV[0] eq 'utf';
sub decode (%) {
my %args = @_;
my $string = $args{'string'};
my $temp_file = "/tmp/utf8-$$\.txt";
open(TMP, ">$temp_file") or print "$temp_file $!"; print TMP $args{'string'}; close TMP;
my $decoded = `/usr/bin/iconv -f \"$args{'from'}\" -t \"$args{'to'}\" $temp_file`;
$decoded;
}
CPAN'd Style UTF Conversion
CPAN Module: Unicode::String
#!/usr/bin/perl
use Unicode::String qw(latin1 utf8);
my $in;
$in = utf8($ARGV[1]) if $ARGV[0] eq 'iso';
$in = latin1($ARGV[1]) if $ARGV[0] eq 'utf8';
print "\nutf8 is: " . $in->utf8 . "\n";
print "\niso is: " . $in->latin1 . "\n";
Examples
./iso.pl utf8 "Zürich"
./iso.pl iso "Zürich"
Another Example
If you receive CGI data in UTF-8 format
# It is a better adivce to store the cookie values in 8-bit ISO format
use Unicode::String qw(latin1 utf8);
my $lastname_iso = (utf8($q->param('lastname')))->latin1;
my $message_iso = (utf8($q->param('message' )))->latin1;
Example script: Fault-tolerant character encoding
The following script will process the data correctly with ISO encoding even if the input data was Latin1 encoded instead of expected UTF8 encoded input data.
#!/usr/bin/perl -w
use strict;
use warnings;
use My::SMS;
use Unicode::String qw(latin1 utf8);
my $sms = My::SMS->new( debug => 0 );
my $text = $ARGV[1] || '*no text*';
# if this host was UTF-8 encoded:
my $text_iso = (utf8($text))->latin1;
my $text_utf8 = (latin1($text_iso))->utf8; # reverse check
if ($text ne $text_utf8) {
print "Unequal reverse check! It seems your input data \"$text\" is ",
"ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
# I'm going to fix this
$text_iso = $text;
}
$sms->build(
nr => $ARGV[0],
text => $text_iso,
originator => '87654321',
flash => 1
);
$sms->send(quiet => 0);
Inline Function
# Usage:
$foo = &toISO($foo);
sub toISO($) {
my $text = $_[0];
# if this host was UTF-8 encoded:
my $text_iso = (utf8($text))->latin1;
my $text_utf8 = (latin1($text_iso))->utf8; # reverse check
if ($text ne $text_utf8) {
print STDERR "Unequal reverse check! It seems your input data \"$text\" is ",
"ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
# I'm going to fix this
$text_iso = $text;
}
$text_iso;
}
|