***** infoCopter.com/perl *****

ISO / UTF-8 Conversion

UTF-8/ISO-8859-1 Conversion

UTF-8 Conversion

#!/usr/bin/perl -w
use strict;
$| = 1;

# Usage:
# ./utf8.pl utf Zürich
# ./utf8.pl iso Zürich

print &decode( string => $ARGV[1] , 
               from   => 'ISO-8859-1', 
               to     => 'UTF-8'      ), "\n" if $ARGV[0] eq 'iso';

print &decode( string => $ARGV[1] , 
               from   => 'UTF-8'     , 
               to     => 'ISO-8859-1' ), "\n" if $ARGV[0] eq 'utf';

sub decode (%) {
	my %args = @_;

	my $string = $args{'string'};

	my $temp_file = "/tmp/utf8-$$\.txt";

	open(TMP, ">$temp_file") or print "$temp_file $!"; print TMP $args{'string'}; close TMP;

	my $decoded = `/usr/bin/iconv -f \"$args{'from'}\" -t \"$args{'to'}\" $temp_file`;

	$decoded;
}


CPAN'd Style UTF Conversion

CPAN Module: Unicode::String

#!/usr/bin/perl

use Unicode::String qw(latin1 utf8);

my $in;

$in = utf8($ARGV[1])    if $ARGV[0] eq 'iso';
$in = latin1($ARGV[1])  if $ARGV[0] eq 'utf8';

print "\nutf8 is: " . $in->utf8 . "\n";

print "\niso is: " . $in->latin1 . "\n";

Examples

./iso.pl utf8 "Zürich"
./iso.pl iso "Zürich"

Another Example

If you receive CGI data in UTF-8 format

# It is a better adivce to store the cookie values in 8-bit ISO format
use Unicode::String qw(latin1 utf8);
my $lastname_iso = (utf8($q->param('lastname')))->latin1;
my $message_iso  = (utf8($q->param('message' )))->latin1;

Example script: Fault-tolerant character encoding

The following script will process the data correctly with ISO encoding even if the input data was Latin1 encoded instead of expected UTF8 encoded input data.

#!/usr/bin/perl -w
use strict;
use warnings;

use My::SMS;
use Unicode::String qw(latin1 utf8);

my $sms = My::SMS->new( debug => 0 );

my $text = $ARGV[1] || '*no text*';

# if this host was UTF-8 encoded:
my $text_iso  = (utf8($text))->latin1;
my $text_utf8 = (latin1($text_iso))->utf8; # reverse check

if ($text ne $text_utf8) {
	print "Unequal reverse check! It seems your input data \"$text\" is ",
		"ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
	# I'm going to fix this
	$text_iso = $text;
}

$sms->build(
	nr   => $ARGV[0],
	text => $text_iso,
	originator => '87654321',
	flash => 1
);

$sms->send(quiet => 0);


Inline Function

# Usage:
$foo = &toISO($foo);

sub toISO($) {
        my $text = $_[0];
        # if this host was UTF-8 encoded:
        my $text_iso  = (utf8($text))->latin1;
        my $text_utf8 = (latin1($text_iso))->utf8; # reverse check

        if ($text ne $text_utf8) {
                print STDERR "Unequal reverse check! It seems your input data \"$text\" is ",
                        "ISO encoded already, so you don't need the latin1 encoding stuff here!\n";
                # I'm going to fix this
                $text_iso = $text;
        }
        $text_iso;
}
© reto :)