#!/usr/bin/perl # # swishquery.pl # by Reto Hersiczky # Copyright (c) 2000 by futureLAB AG # # https://team.futurelab.ch/cgi-bin/swishquery.pl?keywords=abc&swishindex=%2Fusr%2Flocal%2Fswish-e%2Findex_team_futurelab.swish # # #
#
# # # #
#
use CGI; use strict; my $VERSION = '0.91'; # ----- FORWARD DECLARATIONS & PROTOTYPING # sub build_occurrences ($); sub Debug ($); $| = 1; # ----- GLOBAL VARIABLES # my $max_result = 100; my $max_size_for_pdf = 900000; my $q = CGI->new; # whereis swish my $swishexec = "/usr/local/bin/swish-e"; my $keywords = $q->param('keywords'); my $swishindex = $q->param('swishindex'); # build the command string my $command = "$swishexec -f \"$swishindex\" -w \"$keywords\" -m $max_result"; print "Content-type: text/html\n\n"; print qq~ ~; ###################################################################### # # MAIN # ###################################################################### print qq~ Search Result for $keywords - V$VERSION

Refine search:


~; my $match_counter = my $result_count = 0; open(SWISH, "$command |") or print "ERROR $!"; while () { if (/^\# Number of/i) { $result_count = (split /\:/, $_)[1]; last; } } print qq~

Query «$keywords» found $result_count matches

    ~; while () { last if /^\./; my ($file, $title, $size) = split /\"/; $file =~ s/ $//; # remove last space; my $ranking = $file; $ranking =~ s/^(\d+) .+/$1/; $ranking = int($ranking / 10); $ranking = 100 if $ranking > 100; $ranking = 1 if $ranking < 1; $file =~ s/^\d+ //; # remove 1st ranking code if ($file =~ /[pdf|sdw|tar]$/i) { # fix missing bytes with PDF docs my $h_file = $file; $h_file =~ s/^\/files\//\/home\/team\//; $size = -s $h_file; } my $imageicon = "/img/ico_unknown.gif"; $imageicon = "/img/ico_pdf.gif" if $file =~ /pdf$/i; $imageicon = "/img/ico_txt.gif" if $file =~ /txt$/i; $imageicon = "/img/ico_ns.gif" if $file =~ /html?$/i; $imageicon = "/img/ico_staroffice.gif" if $file =~ /sdw$/i; $imageicon = "/img/ico_zip.gif" if $file =~ /zip$/i; $imageicon = "/img/ico_tar.gif" if $file =~ /tar$/i; print qq~
  1. $size bytes $title - $ranking \% - $size bytes
    ~; $file =~ s/^\/files\//\/home\/team\//; my $details = ""; if ($file =~ /pdf$/i && $size > $max_size_for_pdf) { $details = "[ PDF document > $max_size_for_pdf bytes ]"; } elsif ($file =~ /gif$/i) { $details = "[ GIF Image ]"; } else { Debug "N1006: build occurences..." if $q->param('debug'); $details = build_occurrences $file; } print $details; my @f_elems = split /\//, $file; shift @f_elems, shift @f_elems, shift @f_elems; my $link2cat = shift @f_elems; print '
    ', "\"$link2cat\"[ $link2cat"; foreach (@f_elems) { $link2cat .= "/$_"; print " : $_"; } print ' ]
     
    '; last if $match_counter++ > $max_result -2; } close SWISH; print qq~


Refine search:

[ Home ]

~; ################################################################## # # SUB ROUTINES # ################################################################## # build the occurrences of keywords in file sub build_occurrences ($) { my $file = shift; my @keyw_local = split / /, $keywords; my $keyw_loc = length($keyw_local[0]) > length($keyw_local[1]) ? $keyw_local[0] : $keyw_local[1]; my $keyw_loc_2nd = length($keyw_local[0]) > length($keyw_local[1]) ? $keyw_local[1] : $keyw_local[0]; Debug "N1007: keyw_loc = '$keyw_loc' / keyw_loc_2nd = '$keyw_loc_2nd'" if $q->param('debug'); # remove asterisk anywhere $keyw_loc =~ s/\*//; my $content = ""; if ($file =~ /pdf$/i) { open(PDFTOTEXT, "/usr/bin/pdftotext \"$file\" - 2>/dev/null |"); my @pdflines = ; close PDFTOTEXT; $content = join " ", @pdflines; } elsif ($file =~ /sdw$/i) { open(SDWTOTEXT, "/usr/local/swish-e/filters-bin/sdw-filter.sh $file 2>/dev/null |"); my @lines = ; close SDWTOTEXT; $content = join " ", @lines; } elsif ($file =~ /zip$/i) { open(TOTEXT, "/usr/local/swish-e/filters-bin/zip-filter.sh $file 2>/dev/null |"); my @lines = ; close TOTEXT; $content = join " ", @lines; } elsif ($file =~ /tar$/i) { open(TOTEXT, "/usr/local/swish-e/filters-bin/tar-filter.sh $file 2>/dev/null |"); my @lines = ; close TOTEXT; $content = join " ", @lines; } else { # parse text or html file local $/ = undef; # local declaration is important open(F, $file) or print $!; $content = ; close F; $content =~ s/<[^>]*>/ /g if $file =~ /html?$/; # Remove HTML tags } Debug "N1003: length of \$content: " . length($content) if $q->param('debug'); $keyw_loc =~ tr/a-z/A-Z/ if length($keyw_loc) < 5; # uppercase for very short words $keyw_loc_2nd =~ tr/a-z/A-Z/ if length($keyw_loc) < 5; # uppercase for very short words # try to locate my $pos = index($content, $keyw_loc, 0); Debug "N1000: '$keyw_loc' found at position '$pos'" if $q->param('debug'); if ($pos < 0) { # keyword(s) doesn't not match in content! if ($pos < 0) { $keyw_loc = ucfirst lc $keyw_loc; # first letter upper case $pos = index($content, $keyw_loc, 0); Debug "N1001: '$keyw_loc' found at position '$pos'" if $q->param('debug'); } if ($pos < 0) { $keyw_loc =~ tr/a-z/A-Z/; $pos = index($content, $keyw_loc, 0); Debug "N1002: '$keyw_loc' found at position '$pos'" if $q->param('debug'); } if ($pos < 0) { $keyw_loc =~ tr/A-Z/a-z/; $pos = index($content, $keyw_loc, 0); Debug "N1004: '$keyw_loc' found at position '$pos'" if $q->param('debug'); } if ($pos < 0) { $keyw_loc = $keyw_loc_2nd; $pos = index($content, $keyw_loc, 0); Debug "N1005: '$keyw_loc' found at position '$pos'" if $q->param('debug'); } } $pos -= 70; # move back NN characters $pos = 0 if $pos < 0; my $width = 700; # size of partial content $width = length($content) - $pos if ($pos + $width) > length($content); Debug "\$partial = substr(\$content, $pos, $width)" if $q->param('debug'); my $partial = substr($content, $pos, $width); $partial =~ s/$keyw_loc/$keyw_loc<\/b>/i; return $partial; } sub Debug ($) { print "
DEBUG: $_[0]
\n"; } # next free application codes # E1000 # N1008