***** infoCopter.com/perl *****
ISO / UTF-8 ConversionUTF-8/ISO-8859-1 ConversionUTF-8 Conversion
#!/usr/bin/perl -w
use strict;
$| = 1;
# Usage:
# ./utf8.pl utf Zürich
# ./utf8.pl iso Zürich
print &decode( string => $ARGV[1] ,
from => 'ISO-8859-1',
to => 'UTF-8' ), "\n" if $ARGV[0] eq 'iso';
print &decode( string => $ARGV[1] ,
from => 'UTF-8' ,
to => 'ISO-8859-1' ), "\n" if $ARGV[0] eq 'utf';
sub decode (%) {
my %args = @_;
my $string = $args{'string'};
my $temp_file = "/tmp/utf8-$$\.txt";
open(TMP, ">$temp_file") or print "$temp_file $!"; print TMP $args{'string'}; close TMP;
my $decoded = `/usr/bin/iconv -f \"$args{'from'}\" -t \"$args{'to'}\" $temp_file`;
$decoded;
}
CPAN'd Style UTF ConversionCPAN Module: Unicode::String #!/usr/bin/perl use Unicode::String qw(latin1 utf8); my $in; $in = utf8($ARGV[1]) if $ARGV[0] eq 'iso'; $in = latin1($ARGV[1]) if $ARGV[0] eq 'utf8'; print "\nutf8 is: " . $in->utf8 . "\n"; print "\niso is: " . $in->latin1 . "\n"; Examples./iso.pl utf8 "Zürich" ./iso.pl iso "Zürich" Another Example
If you receive CGI data in UTF-8 format
# It is a better adivce to store the cookie values in 8-bit ISO format
use Unicode::String qw(latin1 utf8);
my $lastname_iso = (utf8($q->param('lastname')))->latin1;
my $message_iso = (utf8($q->param('message' )))->latin1;
Example script: Fault-tolerant character encodingThe following script will process the data correctly with ISO encoding even if the input data was Latin1 encoded instead of expected UTF8 encoded input data.
#!/usr/bin/perl -w
use strict;
use warnings;
use My::SMS;
use Unicode::String qw(latin1 utf8);
my $sms = My::SMS->new( debug => 0 );
my $text = $ARGV[1] || '*no text*';
# if this host was UTF-8 encoded:
my $text_iso = (utf8($text))->latin1;
my $text_utf8 = (latin1($text_iso))->utf8; # reverse check
if ($text ne $text_utf8) {
print "Unequal reverse check! It seems your input data \"$text\" is ",
"ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
# I'm going to fix this
$text_iso = $text;
}
$sms->build(
nr => $ARGV[0],
text => $text_iso,
originator => '87654321',
flash => 1
);
$sms->send(quiet => 0);
Inline Function
# Usage:
$foo = &toISO($foo);
sub toISO($) {
my $text = $_[0];
# if this host was UTF-8 encoded:
my $text_iso = (utf8($text))->latin1;
my $text_utf8 = (latin1($text_iso))->utf8; # reverse check
if ($text ne $text_utf8) {
print STDERR "Unequal reverse check! It seems your input data \"$text\" is ",
"ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
# I'm going to fix this
$text_iso = $text;
}
$text_iso;
}
|