#!/usr/bin/perl

# $Id: japana,v 1.39 2003/05/18 10:29:23 mastermitch Exp $

# Japana proxy
#
# 2001-2003 (C) by Christian Garbs <mitch@cgarbs.de>
#                  Benjamin Heuer <benjaminheuer@t-online.de>
#
# Licensed under GNU GPL. See COPYING for details.

# This is a simple http proxy that uses KAKASI to translate Kana and
# Kanji into Romaji on the fly.

# See http://sf.net/projects/japana/ for new versions, support etc.
# Please report bugs on the website or via mail to <japana-bugs@cgarbs.de>

use strict;
use warnings;
use AppConfig qw(:expand);
use HTTP::Daemon;
use LWP::UserAgent;
use Text::Kakasi;

# use Compress::Zlib if available
my $have_zlib;
BEGIN {
    eval { require Compress::Zlib; };
    $have_zlib = not $@;
}

=head1 NAME

japana - HTTP proxy converting Japanese characters into ASCII

=head1 SYNOPSIS

B<japana>
S<[ B<--addr> I<addr> ]>
S<[ B<--auth> ]>
S<[ B<--configfile> I<configfile> ]>
S<[ B<--kakasioptions> I<options> ]>
S<[ B<--port> I<port> ]>
S<[ B<--proxy> I<proxy> ]>
S<[ B<--userfile> I<userfile> ]>
S<[ B<--version> ]>

B<japana>
S<[ B<-a> I<addr> ]>
S<[ B<-A> ]>
S<[ B<-c> I<configfile> ]>
S<[ B<-o> I<options> ]>
S<[ B<-p> I<port> ]>
S<[ B<-P> I<proxy> ]>
S<[ B<-u> I<userfile> ]>
S<[ B<-V> ]>

=head1 OVERVIEW

japana is a small and simple proxy written in Perl.  The proxy
converts Japanese characters (Hiragana, Katakana, Kanji etc.) into
ASCII (Romaji) on the fly.  The conversion is done using the KAKASI
library.

=head1 DESCRIPTION

Just start B<japana>.  This will by default create a proxy running on
http://localhost:8080 (it will fail if something else is already
running on this port).  Then point your browser to the proxy.  Browse
some Japanese website (e.g. F<http://amazon.co.jp>) and see all those
Japanese characters converted to plain ascii text.

=head2 Switches

=over 5

=item B<--addr> I<addr> | B<-a> I<addr>

This is the IP address that japana will bind to.  This address
(together with the correct port) must be configured in your browser to
make use of the japana proxy.

Be careful: Everybody who can reach the japana port on this address
can use your proxy.  Consider enabling authentication (B<--auth>
option).  You might also bind to an address only reachable from your
local net or use a packet filter to 'guard' japana from the outside.

The address '0.0.0.0' will bind japana to all of your network
devices.

Default is to bind to address '127.0.0.1' as this address can only be
accessed from your local computer and is not accessible from the
network.  Please take care when binding to another address.

=item B<--auth> | B<-A>

This enables the "basic proxy authentication scheme" as described in
RFC 2617.  If enabled, you must enter a valid username and password
before you can use the japana proxy.  Note that the passwords are not
encrypted in any way, so don't use important ones.

Default is to use no authentication.

=item B<--configfile> I<configfile> | B<-c> I<configfile>

The options from the given configuration file will be read.  These
options can be overridden by other command line arguments.

Default is not to read a configuration file.

=item B<--kakasioptions> I<options> | B<-o> I<options>

These options are passed directly to kakasi and affect the conversion
process.  See the kakasi documentation for details.

Default options are '-ja -ga -ka -Ea -Ka -Ha -Ja -U -s' and should be
reasonable.

=item B<--port> I<port> | B<-p> I<port>

This is the port on which japana listens to your incoming requests.
This port (together with the correct address) must be configured in
your browser to make use of the japana proxy.

Default setting is port 8080.

=item B<--proxy> I<proxy> | B<-P> I<proxy>

If this variable contains a value, the given proxy is used by japana.
This allows you to chain multiple proxies together.

Example: If you need a proxy to access the Internet then point your
browser to the japana proxy and in turn point japana to your original
proxy.

Set this to 'none' to use no proxy at all.

Default is to use the environment variable ${http_proxy}.

=item B<--userfile> I<userfile> | B<-u> I<userfile>

This file contains the usernames and passwords to use when
authentication is enabled.

Default userfile is '/etc/japana.users'.

=item B<--version> | B<-V>

This prints the current version of japana and exits.

=back

=head2 Configuration file format

Configuration is also possible via configuration files.  Every command
line switch is possible in a configuration file.  Empty lines and
lines starting with B<#> are ignored.

Instead of B<--port 3128> you would put this line in the configuration file:

 port = 3128

B<-o '-ja -ga -ka -U -s'> will become

 kakasioptions = -ja -ga -ka -U -s

and so on and so forth.

=head2 Userfile file format

This file contains the usernames and passwords used for
authentication.  Every line must contain one username and the
corresponding password separated by a colon.  Empty lines and lines
starting with B<#> are ignored.

This example file contains the user 'japana' with the password
'simple':

 # This is just an example.
 # Consider changing your password before using japana.
 japana:simple

=head1 MODULES NEEDED

 use AppConfig;
 use HTTP::Daemon;
 use LWP::UserAgent;
 use Text::Kakasi;

These modules can be obtained at <F<http://www.cpan.org>> and
Text::Kakasi can be found here:
<F<http://www.daionet.gr.jp/~knok/kakasi/>>.

If setting up kakasi is too complicated, you might try the old 1.0.x
version of japana.  It does not use kakasi (and because of that can't
convert Kanji).

=head1 OPTIONAL MODULES

 use Compress::Zlib;

This module can be obtained at <F<http://www.cpan.org>>.

When this module is installed, gzipped data transfer is available
between your browser, japana and web servers.

=head1 BUGS

In the default configuration, japana supports B<NO ACCESS CONTROL!>
Everyone with access to the japana port on your system will be able to
use the proxy.  Please consider the use of password authentication
(B<--auth>) or bind japana to a port that is either only available
from your local network or protected by a packet filter.

Please report bugs the project website
<F<http://sf.net/projects/japana/>> or send a mail to
<F<japana-bugs@cgarbs.de>>.

=head1 AUTHOR

japana was written by Christian Garbs <F<mitch@cgarbs.de>>.  Look for
updates, support etc. at <F<http://sf.net/projects/japana/>>.

=head1 COPYRIGHT

japana is licensed under the GNU GPL.

=head1 THANKS

Thanks go to Tobias Diedrich <F<ranma@gmx.at>> and Benjamin Heuer
<F<benjaminheuer@t-online.de>> for patches, ideas, bug-reports and beta
testing.

Sorry to those guys from <F<news:de.soc.kultur.japan>> who helped with
the translation routine in version 1.0.x: It is not used any more.

=cut

######[ Subroutines ]
#

sub print_usage()
# prints a short help text and exits
{
    print << "EOF";

Usage: japana [options]
Supported options (long and short forms):
  -a, --addr          : set address to listen on
  -A, --auth          : enablt proxy authentication
  -c, --configfile    : set configuration file
  -h, --help          : print usage and exit
  -o, --kakasioptions : set KAKASI options
  -p, --port          : set port to listen on
  -P, --proxy         : set proxy to use
  -u, --userfile      : set username and password file
  -v, --version       : print version number and exit
EOF
;
    exit 0;
}

sub send_proxyauth() 
# a 407 response which tells the client that authentication is needed
{
    my $header = HTTP::Headers->new(proxy_authenticate => 'Basic realm="japana"');
    my $response = HTTP::Response->new("407", "Proxy Authentication Required", $header);
    return $response;
}    

######[ Main program ]
#
   
my $CVSVERSION = do { my @r = (q$Revision: 1.39 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $VERSION = "2.0.5";
print "this is japana ${VERSION}/${CVSVERSION}\n";

# define configuration options
my $config = AppConfig->new( { CASE => 1 } );
$config->define( 'configfile|c=s',    { DEFAULT => '' } );
$config->define( 'userfile|u=s',      { DEFAULT => '/etc/japana.users' } );
$config->define( 'addr|a=s',          { DEFAULT => '127.0.0.1' } );
$config->define( 'port|p=s',          { DEFAULT => '8080' } );
$config->define( 'proxy|P=s',         { DEFAULT => $ENV{'http_proxy'}, EXPAND => EXPAND_ENV } );
$config->define( 'kakasioptions|o=s', { DEFAULT => '-ja -ga -ka -Ea -Ka -Ha -Ja -U -s' } );
$config->define( 'version|V!' );
$config->define( 'help|h!' );
$config->define( 'auth|A!' );

# Another config file might be given on command line, so process a copy of ARGV
$config->getopt( qw(no_ignore_case), [ @ARGV ]);

# if we are to just print the version number, then quit now
exit if $config->version();

# if we are to just print help, then do it now
print_usage() if $config->help();

# read config file, if existent and desired
if ($config->configfile() ne "") {
    if (-r $config->configfile()) {
	print "- reading options from `".$config->configfile()."'\n";
	$config->file($config->configfile());
    } else {
	warn "can't read configuration file `".$config->configfile()."': $!\nusing built-in defaults\n";
    }
}

# override config file with command line arguments
$config->getopt( qw(no_ignore_case), [ @ARGV ] );

# if we are to just print the version number, then quit now
exit if $config->version();

# if we are to just print help, then do it now
print_usage() if $config->help();

# read the password file and save it in a hash (user - password pairs)
my %auth;
if ($config->auth()) {
    print "- authentication enabled, using `".$config->userfile()."'\n";
    open PASS, '<', $config->userfile() or die "can't open userfile `".$config->userfile()."': $!";
    while (my $line = <PASS>) {
        chomp $line;
	$line =~ s/\r$//; # evil DOS linebreaks
	
        next if $line =~ /^#/;
	next if $line =~ /^\s*$/;
	
        my ($user, $pass) = split /:/, $line;
	warn "duplicate username `${user}' in userfile!\n" if exists $auth{$user};
        $auth{$user} = $pass;
    }
    close PASS or die "can't close userfile `".$config->userfile()."': $!";
}

# create kakasi connection
my $res = Text::Kakasi::getopt_argv('kakasi', split /\s+/, $config->kakasioptions());
print "- kakasi options: `".$config->kakasioptions()."'\n";

# using gzip?
# TODO: expand this to the other means of compression as well
my $accept_encoding = "identity";
if ($have_zlib) {
    $accept_encoding = "gzip, identity;q=0.5";
    print "- using zlib compression\n";
}

# create proxy
my $proxy = HTTP::Daemon->new(
			      LocalAddr=>$config->addr(),
			      LocalPort=>$config->port()
			      );
die "@_" unless defined $proxy;

# create UserAgent
my $ua = LWP::UserAgent->new;
$ua->agent("japana ${VERSION}/${CVSVERSION}");

if ( defined $config->proxy()
     and $config->proxy() ne ""
     and $config->proxy() ne "none" ) {
    print "- using existing proxy on ".$config->proxy()."\n";
    $ua->proxy('http', $config->proxy());
}

print "- proxy started on ".$config->addr().":".$config->port()."\n";

# Don't accumulate zombies
# (we don't care about our children
#  -> possible SIGPIPES when browser aborts request)
$SIG{CHLD} = 'IGNORE';

while (my $conn = $proxy->accept) {
    if (! fork()) {
	# CHILD
	while (my $request = $conn->get_request) {
	    my $response;

	    # check if the proxy_authorization header is set (only when authentication is enabled),
	    # if not send a 407 to the client
	    if($config->auth and ! $request->proxy_authorization) {
	        $response = send_proxyauth();
	    } else {   

	    	# ok, the proxy_authorization header is there (or authentication is disabled),
		# now check the user and pass (if necessary)
	        my ($user, $pass) = $request->proxy_authorization_basic;

	        if (! $config->auth or (defined $auth{$user} and $auth{$user} eq $pass)) {

		    # remember if gzip was requested
		    my $gzip_requested = ( $request->header('Accept-Encoding') and 
					   grep /^gzip(;q=\d+(\.\d+)?)?/, split /,\s/, $request->header('Accept-Encoding') );

		    # Do/don't request compressed data
		    #
		    # TODO: We assume that Accept-Encoding identity is
		    # always supported by the client.  The original
		    # Accept-Encoding is simply overwritten.  This
		    # should of course be changed in the future!
		    $request->header('Accept-Encoding' => $accept_encoding);

		    # do the HTTP request
	            $response = $ua->simple_request($request);

		    # is this response gzipped?
		    my $currently_gzipped = (defined $response->header('Content-Encoding') and
					     $response->header('Content-Encoding') eq "gzip");

		    my $converted = 0;
	            # check if content type is text and do the conversion
	            if ( lc substr ($response->content_type(), 0, 5) eq "text/" ) {

			# decompress data for conversion
			if ( $currently_gzipped ) {
			    $response->remove_header('Content-Encoding');
			    $response->content( Compress::Zlib::memGunzip( $response->content() ) );
			}
			
			# do conversion
		        $response->content( Text::Kakasi::do_kakasi( $response->content() ) );
			$converted = 1;

			# recompress data after conversion
			if ( $currently_gzipped and $gzip_requested ) {
			    $response->header('Content-Encoding' => 'gzip');
			    $response->content( Compress::Zlib::memGzip( $response->content() ) );
			};
		    }

		    # decompress data if compressed and no compression requested
		    if ( $currently_gzipped and not $gzip_requested ) {
			$response->remove_header('Content-Encoding');
			$response->content( Compress::Zlib::memGunzip( $response->content() ) );
			$currently_gzipped = 0;
		    }
			
		    # print log message
		    # TODO: more flexible logging, activate debug via configuration variable
	            printf 
#			"%s%s%s  %s\t%s\n" ,
			"%s  %s\t%s\n" ,
#			$gzip_requested    ? 'g' : ' ' ,
			$converted         ? 'c' : ' ' ,
#			$currently_gzipped ? 'g' : ' ' ,
			$response->content_type() ,
			$request->uri() ;

	        }
		# if the user/pass don't match then send another 407 and try again
		else {
		  $response = send_proxyauth();
		}
	    }	
	    $conn->send_response($response);
	    exit;
	}
    }
    $conn->close;
}
