#!/usr/bin/perl -T # ms2pdf, Andrew Daviel, TRIUMF, 2008 # $ENV{PATH} = '/bin:/usr/bin' ; # need wget, bash use Time::Local ; $sfx{HTML} = 'html' ; $sfx{Text} = 'txt' ; $sfx{PDF} = 'pdf' ; $type{html} = 'text/html' ; $type{txt} = 'text/plain' ; $type{pdf} = 'application/pdf' ; $oopython = "/opt/openoffice.org2.4/program/python" ; $method = $ENV{'REQUEST_METHOD'} ; $path = $ENV{'PATH_INFO'} ; $agent = $ENV{'HTTP_USER_AGENT'} ; $content_type = $ENV{'CONTENT_TYPE'} ; $content_length = $ENV{'CONTENT_LENGTH'} ; $cookie = $ENV{'HTTP_COOKIE'} ; $query = $ENV{'QUERY_STRING'} ; $referer = $ENV{'HTTP_REFERER'}; $len = $ENV{'CONTENT_LENGTH'}; $script = $ENV{'SCRIPT_NAME'}; $raddr = $ENV{'REMOTE_ADDR'} ; $now = localtime() ; #$nowt = &wtime(time()) ; $pragma = $ENV{'HTTP_PRAGMA'} ; $pragma =~ tr/A-Z/a-z/ ; $imsd = $ENV{'HTTP_IF_MODIFIED_SINCE'} ; $ims = $imsd ; $ims =~ s/;.*// ; # for Netscape $ims = &get_gmtime($ims) ; open (LOG,">>logs/ms2pdf.log") ; if ($method eq 'POST' and $len>0) { read(STDIN, $buffer, $len) ; } elsif ($method eq 'GET') { $buffer = $query ; } else { print<Bad URL \"$F{url}\" - must start with http[s]:// or ftp://

\n" ; &foot ; print LOG "$now $raddr $method \"$F{url}\"\n" ; exit ; } $url = $1 ; $url =~ s/"/\\"/g ; #foreach $f (keys %ENV) { # print LOG "$f $ENV{$f}\n" ; #} chdir "soffice" ; open (IN,"wget -N -T 15 -x -S \"$url\" 2>\&1|") ; $status = 0 ; while () { $wget .= $_ ; chomp ; if (/ HTTP\/[\d]\.[\d][\s]+([\d]+) (.*)/) { $status = $1 ; $reason = $2 ; } if (/ Content-Type:[\s]+([\w]+\/[\w\.\-]+)/i) { $type = $1 ; $type =~ tr/A-Z/a-z/ ; } if (/ => `(.*)'/) { $ifile= $1 ; } if (/\. failed: /) { $err = $_ ; } if (/WWW-Authenticate:[\s]+(Basic.*)/i) { $realm = $1 ; } } close(IN) ; $stat = $? ; if ($stat and $status == 0) { print<Retrieve error $err

$url EOT print LOG "$now $raddr $method $url $err\n" ; exit ; } if ($status == 401 and 0) { print<Error $status $reason Error $status $reason

$url EOT print LOG "$now $raddr $method $url $status $reason\n" ; exit ; } if ($ifile =~ /\.([a-z]{2,4})$/i) { $suffix = $1 ; $suffix =~ tr/A-Z/a-z/ ; #print "Suffix $suffix\n" ; } if (-f $ifile) { @stat = stat($ifile) ; $fmod1 = $stat[9] ; $ofile = &escape($ifile) . '.' . $otype ; } else { $fmod1 = 0 ; } if ($ims and $fmod1 and $fmod1<=$ims) { print "Status: 304 Not Modified\n\n" ; print LOG "$now $raddr $method $url 304\n" ; exit ; } if (-f $ofile) { @stat = stat($ofile) ; $fmod2 = $stat[9] ; $fsize2 = $stat[7] ; } else { $fmod2 = 0 ; } if ($fmod1 and $fmod2 and $fmod2 >= $fmod1 and $fsize2) { $glmod = &wtime($fmod2); print LOG "$now $raddr $method $url cached $ofile $glmod\n" ; print<) { print $_ ; } exit ; } open (IN,"$oopython ../DocumentConverter.py \"$ifile\" $ofile 2>\&1|") ; while () { chomp ; if (/ERROR/) { $err = $_ ; } } close(IN) ; if ($?) { print<Conversion error $err

$url EOT print LOG "$now $raddr $method $url convert error $err\n" ; exit ; } system("touch -r \"$ifile\" $ofile") ; if (-f $ofile) { @stat = stat($ofile) ; $fsize2 = $stat[7] ; } else { die "$0 create $ofile failed\n" ; } $glmod = &wtime($fmod1); print LOG "$now $raddr $method $url created $ofile $glmod\n" ; print<) { print $_ ; } exit ; # Netscape: # Reload sends no-cache and IMS # Shift-Reload sends no-cache without an IMS sub wtime { my @DoW = ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'); my @MoY = ('Jan','Feb','Mar','Apr','May','Jun', 'Jul','Aug','Sep','Oct','Nov','Dec'); my $time = shift ; my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime($time) ; $year += 1900; sprintf("%s, %02d %s %04d %02d:%02d:%02d %s", substr($DoW[$wday],0,3), $mday, $MoY[$mon], $year, $hour, $min, $sec, 'GMT'); } sub get_gmtime { local($_) = @_; my $Mstr = 'JanFebMarAprMayJunJulAugSepOctNovDec'; local($day, $mn, $yr, $hr, $min, $sec, $adate, $mon, $midx); if (/([\d]+) ([\w]{3}) ([\d]{4}) ([\d]+):([\d]+):([\d]+) GMT/) { $day = $1 ; $mn = $2 ; $yr = $3 ; $hr = $4 ; $min = $5 ; $sec = $6 ; } else { return 0 ; } $midx = index($Mstr, substr($mn,0,3)); if ($midx < 0) { return 0; } else { $mon = $midx / 3; } # Translate to seconds since Epoch return (timegm($sec, $min, $hr, $day, $mon, $yr) ); } sub escape { my $str = shift ; my $pat = '[\x00-\x20"#%/;:<>?\x7F-\xFF]' ; my $pat = '[\x00-\x2F\x3A-\x40\x5B-\x60\x7B-\xFF]' ; $str =~ s/($pat)/sprintf("%%%02lx",unpack('C',$1))/ge; return($str); } # wget -r -l inf -N # -q quiet -O xx output -N time-stamp # --ignore-length --header=additional-header # wget --ignore-length --header=if-modified-since:"Sat, 01 Mar 2008 02:37:00 GMT" -N -O foo.html -nv http://andrew.triumf.ca 2>&1 |less #application/msword doc #application/vnd.ms-powerpoint ppt #application/vnd.ms-excel xls #application/vnd.oasis.opendocument.text odt #application/vnd.sun.xml.writer sxw #application/vnd.sun.xml.calc sxc #Authorization: Basic YWR2YXg6dHYwZzBucw== sub head { print<Document Converter

Document Converter

Convert "office" documents to PDF or HTML using OpenOffice server, including Microsoft Word, Excel, PowerPoint, OpenOffice and StarOffice documents and presentations.

Get as

N810 hint (Firefox 3): Make a new window for this page. Tap-and-hold the document link on the original page, then copy the URL to the clipsheet. Switch to this page and paste the URL in the box above (Ctrl-V).

EOT } sub foot { print<Notes:
  • The conversion requires OpenOffice running on the server; there is currently no assurance that it will be restarted if it dies.
  • Conversion and upload may take several seconds, or longer for large documents
  • Not all conversion pairs are supported

Derived from the OOONinja article by Andrew Ziem.

Andrew Daviel < advax (at) triumf.ca >
EOT }