Search Results

#!/usr/bin/perl
# HOP ON POP
#
# BY LUKE PRITCHETT AND DR. SEUSS
#
# This program searches an XML database of elementary school books
# The user can use a basic search that searches in all of the information
# about the book
# The Advanced Search keys restrict the search results that are returned to the 
# user depending on if there's a match or not
# The program then parses information about the book and outputs a list of retuned books.
#
# 2/5/06


use CGI;
use CGI::Carp qw(warningsToBrowser fatalsToBrowser);
use strict;
use warnings;

my $database = "sample.xml";

my $cgi = CGI->new;

# Get the variables from the form
my $key = $cgi->param('key');
my $owner = $cgi->param('owner');
my @continuum = $cgi->param('continuum');
my $gradelevel = $cgi->param('gradelevel');
my $fplevel = $cgi->param('fplevel');
my $genrelong = $cgi->param('genre');

my @genres;

if ($genrelong)
{
	@genres = split(/, /, $genrelong);
}

print $cgi->header;
print $cgi->start_html();
print "<h1>Search Results</h1>\n&nbsp&nbsp\n";

open (READ, $database);
flock(READ, 2);
my @books = <READ>;
close READ;

# Remove the first and last lines, which aren't books.
shift(@books);
pop(@books);


# BIG NOTE:
# LARGE problem with inputs. Entering regex will cause wierd results,
# Doing other wierd stuff will crash crash crash.

# the $advanced variable signifies how many
# advanced searches the user supplied
my $advanced = 0;
if ($owner)
{
	$advanced++;
}
if (@continuum)
{
	$advanced++;
}
if (@genres)
{
	$advanced++;
}
if ($fplevel)
{
	$advanced++;
}
if ($gradelevel)
{
	$advanced++;
}


# @matches is all of the books that match the basic search keywords
# If key is defined, do a basic search, if there's no basic search but
# there is an advanced search put ALL of the books into the @matches array
# so we can refine it later. If there's neither, matches stays empty.
my @matches;
my $relevance;
if ($key)
{
	foreach (@books)
	{
		if (($relevance = &searchkeys($key, $_)) >= 1)
		{
				push(@matches, $relevance . $_);
		}
	}
}
elsif ($advanced > 0)
{
	foreach (@books)
	{
		push(@matches, "0" . $_);
	}
}

my @advmatches;
my $advmatch;

# If there are advanced variables supplied do the advanced searches, adding
# matches to the new @advmatches array. If there's no advanced search, @advmatches
# becomes @matches directly.
if ($advanced > 0)
{
	foreach (@matches)
	{
		$advmatch = 0;
		
		# if an advanced variable is being used and no keys from it are found, we add to advmatches
		if ($owner && &searchowner($_, $owner) >= 1) { $advmatch++; }
		if (@continuum && &searchcontinuum($_, @continuum) >= 1) {$advmatch++; }
		if ($fplevel && &searchfp($_, $fplevel) >= 1) {$advmatch++; }
		if ($gradelevel && &searchgradelevel($_, $gradelevel) >= 1) { $advmatch++; }
		
		# If there was at least one match for each advanced search
		if ($advmatch >= $advanced)
		{
			push(@advmatches, $_);
		}
	}
}
else
{
	@advmatches = @matches;
}

# Sort the matches by relevance (it's the number we added to the beginning of the string)
@advmatches = reverse sort @advmatches;

if (@advmatches)
{
	foreach (@advmatches)
	{
		&parsebook($_);
	}
}
else
{
	print "Couldn't find any matches for your search. Try refining your search.\n";
}

print "<a href=\"/search.html\">Search Again</a>\n";
print "</body>\n</html>";

#---------------------------------------------------
# -- searchkeys --
# Splits up the key string from the basic search into words and searches for them.
# We pass (key, text). The key still has spaces, we split it here
# into individual words. The words then get searched for.
# It's a really simple search, but it will work. Unfortunately,
# it will also find a lot of things it's not supposed to.
#
# ARG[0] - The keyword string to be split up.
# ARG[1] - The text to be searched
sub searchkeys
{
	my $keyline = shift(@_);
	my $text = shift(@_);
	
	# Is the word 'keys' reserved? How odd.
	my @allkeys = split(/\W+/, $keyline);
	
	# Go through the list of keys
	my $matches = 0;
	foreach my $key (@allkeys)
	{
		# We have to make sure that the user can't find
		# words that are in tags. That would be bad
		while ($text =~ /[^<\/]$key[^>]/ig)
		{
			# The more matches we find, the higher the relevance
			$matches++;
		}
	}
	return $matches;
}
#-----------------------------------------------------------------
# -- searchowner -- 
# Searches for a specific owner (a grade as a whole)
# There can only be one grade searched for at a time
#
# ARG[0] The text of the book
# ARG[1] The key string.
sub searchowner
{
	my $book = shift(@_);
	my $grade = shift(@_);

	if ($book =~ /<owner>.*[^<\/]$grade[^>].*<\/owner>/i)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

#-----------------------------------------------------------------
# -- searchlocation -- 
# Searches for a location.
# Split and then make sure that every key is found in the text
# A bit harsh, but the only way to do it.
# ARG[0] The text of the book
# ARG[1] The key string.
sub searchlocation
{
	my $book = shift(@_);
	my $longkey = shift(@_);
	
	# commented out because we need more complicated splitting
	# or syntax
	my @allkeys = split(/\W+/, $longkey);
	my $numkeys = @allkeys;
	
	my $keysfound = 0;
	my $matches = 0;
	foreach $key (@allkeys)
	{
		if ($book =~ /<owner>.*[^<\/]$longkey[^>].*<\/owner>/i)
		{
			$keysfound++;
		}
	}
	
	if ($keysfound == $numkeys)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

#-----------------------------------------------------------------
# -- searchcontinuum --
# Advanced search. only returns books that have the specified
# continuum things.
# ARG[0] - The text of the book to be searched
# ARG[1] - The array of continuum things. (It comes in in an array)

sub searchcontinuum
{
	my $book = shift(@_);
	my @allkeys = @_;
	
	foreach $key (@allkeys)
	{
		if ($book =~ /<userlookup2><displayname>.*$key.*<\/userlookup2>/i)
		{
			return 1;
		}
	}
	return 0;
}


#-----------------------------------------------------------------
# -- searchgradelevel --
# Advanced search. Returns 1 for books that have the same first digit in their
# grade level as the number given by the user.
# ARG[0] - The text of the book to be searched
# ARG[1] - The number (hopefully) that the user supplied

sub searchgradelevel
{
	my $book = shift(@_);
	my $key = shift(@_);

	if ($book =~ /<gradelevel>$key\./i)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

#--------------------------------------------------------------------
# searchfp
#
# Returns 1 for books that are rated with the fp rating that
# the user selected. returns 0 otherwise
#
# ARG[0] - The book text
# ARG[1] - The key
sub searchfp
{
	my $book = shift(@_);
	my $key = shift(@_);
	
	if ($book =~ /<userlookup1><displayname>$key/i)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}



# -----------------------------------------------------------------
# Parses and outputs (correctly) information about a book.
# The only argument is the book to be parsed 
sub parsebook
{
	my $book = shift(@_);
	
	my $title;
	my $relevance;
	my $location;
	my $plot;
	my $isbn;
	my $owner;
	my $continuum;
	my $fp;
	
	#---------------------------
	# HERE BE PARSING
	
	# We will probably want to add something to make keys that we found boldfaced
	# in the output. This could just be a matter of doing s/$key/<b>$key</b>/ before
	# parsing, but that seems hackish and I have a feeling it won't be that simple
	
	# Parse relevance. It will always be at the beginning
	if($book =~ /^([0-9]+)/)
	{
		$relevance = $1;
	}
	
	# Parse title
	if($book =~ /<title>(.+)<\/title>/)
	{
		$title = $1;
	}

	# Parse location
	if($book =~ /<location>.*<displayname>(.+)<\/displayname>.*<\/location>/)
	{
		$location = $1;
	}
	
	# Parse authors
	my @authors;
	while ($book =~ /<person><displayname>(.+?)<\/displayname>/g)
	{
		push(@authors, $1);
	}
	
	# Parse plot
	if($book =~ /<plot>(.+)<\/plot>/)
	{
		$plot = $1;
	}
	
	# Parse ISBN
	if($book =~ /<isbn>([0-9]+)<\/isbn>/)
	{
		$isbn = $1;
	}

	# Parse continuum
	if ($book =~ /<userlookup2>.*<displayname>(.+)<\/displayname>.*<\/userlookup2>/)
	{
		$continuum = $1;
	}

	# Parse Owner
	if ($book =~ /<owner>.*<displayname>(.+)<\/displayname>.*<\/owner>/)
	{
		$owner = $1;
	}

	# Parse FP
	if ($book =~ /<userlookup1>.*<displayname>(.+)<\/displayname>.*<\/userlookup1>/)
	{
		$fp = $1;
	}

	
	#-------------------------------------------------------------
	# HERE BE OUTPUT
	
	#print  $title, " -- Relevance: ", $relevance, "\n";
	#print "<img>", 
	print "<b>", $title, "</b> - <i>";
	print join ", ", @authors;
	print "\n\n";
	# Fun little hack to get rid of the trailing comma. :P
	print "</i>&nbsp\n";
	
	print "<p>\n\t", $plot, "\n</p>&nbsp\n";
		
	print "<i>ISBN</i> - ", $isbn, "<br>";
		
	print "<i>location</i> - ", $location, "</i>\n<br>\n";
	
	
	print "Owner: ", $owner, "<br>\n";
	print "Continuum: ", $continuum, "<br>\n";
	print "FP: ", $fp, "\n<br>\n";
	print "<br><br>\n";
}