#!/usr/bin/perl -T # # pitchfork reviews project # cgi to generate concordances for a given word (or pair of words!) # # loren jan wilson, december 2003? # use strict; use CGI qw(:standard); use CGI::Carp qw(fatalsToBrowser); use DBI; use Reviews; $CGI::POST_MAX=1024 * 15; # max 20K posts $CGI::DISABLE_UPLOADS = 1; # no uploads $ENV{PATH} = ""; my $q = new CGI; print $q->header(-expires=>'now'); print $q->start_html(); my $begin_time = time(); my $dbh = make_db_handle; my $id = $q->param('id') || 100; $id =~ s/[^\d]//g; $id = substr($id, 0, 10); my $word_id = $id; my $id2 = $q->param('id2') || ""; $id2 =~ s/[^\d]//g; $id2 = substr($id2, 0, 10); my $word_id2 = $id2; my $limit = $q->param('limit') || 50; #only return 100 at a time $limit =~ s/[^\d]//g; $limit = substr($limit, 0, 4); my $offset = $q->param('offset') || 0; $offset =~ s/[^\d]//g; $offset = substr($offset, 0, 7); # get word and print it at the top my $word = dbquery($dbh, qq/ SELECT word FROM word WHERE id = $word_id /); my $word2 = "This word will never occur"; if ($id2) { $word2 = dbquery($dbh, qq/ SELECT word FROM word WHERE id = $word_id2 /); $limit = 5000; } my $wordcount = dbquery($dbh, qq/ SELECT count(word_id) FROM word_sentence WHERE word_id = $word_id /); my $firstnum = $offset+1; my $lastnum = $limit+$offset; if ($word2 eq "This word will never occur") { $word2 = " "; } print "

$word $word2

\nShowing sentences $firstnum - $lastnum; there are $wordcount total occurrences in the database."; my $new_url = $q->url(-relative=>1, -query=>1); if ($new_url =~ /offset=(\d+)/) { my $new_offset = $1 + $limit; $new_url =~ s/offset=(\d+)/offset=$new_offset/; } else { my $new_offset = $offset + $limit; $new_url = $new_url.";offset=$new_offset"; } print qq%
next $limit>>%; print "
    \n"; # get sentences and print them one at a time # sort the sentences by date and then sentence_id. # also get information from articles for each sentence..?! my @fields = qw(sentence.id sentence.beginchar sentence.length band.name album.name label.name rating_id author.id author.name article.id article.date paragraph.position paragraph.text); my $selectstring = join(",", @fields); my $sentences = dbquery($dbh,qq/ SELECT $selectstring FROM word_sentence,sentence,paragraph,article,album,band,label,author WHERE word_id = $word_id AND sentence_id = sentence.id AND paragraph_id = paragraph.id AND article_id = article.id AND album_id = album.id AND band_id = band.id AND label_id = label.id AND author_id = author.id ORDER BY date DESC, sentence_id LIMIT $offset,$limit /); # if we only have one word, there is an error...this is the workaround if (! ref $$sentences[0]) { $sentences = [ $sentences ]; } # print all the stuff... my %seen_sentence_id; my @ratings; foreach my $sentence (@$sentences) { my %sentence; if (! ref $sentence) { $sentence = [ $sentence ]; } # load values into hash using items from fields as keys foreach my $i ( 0 .. $#fields ) { $sentence{$fields[$i]} = $$sentence[$i]; } # don't print a sentence twice next if $seen_sentence_id{$sentence{'sentence.id'}}; # find out whether the second word is also in this sentence my $sentence2 = ""; if ($word_id2) { $sentence2 = dbquery($dbh, qq/ SELECT word_id FROM word_sentence WHERE word_id = $word_id2 AND sentence_id = $sentence{'sentence.id'} LIMIT 1 /); next if (! $sentence2); } # parse sentence text from paragraph.text using beginchar and length my $offset = $sentence{"sentence.beginchar"} - 1; my $length = $sentence{"sentence.length"}; my $text = $sentence{"paragraph.text"}; my $sentence_text = substr($text,$offset,$length); # highlight the word in the text $sentence_text =~ s/([^\w])${word}([^\w])/$1${word}<\/b>$2/g; $sentence_text =~ s/([^\w])${word2}([^\w])/$1${word2}<\/b>$2/g; # print band.name: album.name [label.name; rating:rating_id/100] # by author.name (author.id) on article.date ...(article.id) my $date = $sentence{"article.date"}; ($date = "date unknown") if (! $date); print "
  1. "; print qq(); print qq($sentence{"band.name"}: ); print qq($sentence{"album.name"} ); print qq([$sentence{"label.name"}; rating $sentence{"rating_id"}/100] ); print qq($sentence{"author.name"}, $date); print "
    \n"; print qq/$sentence_text (p.$sentence{"paragraph.position"})/; print "\n"; # collect this rating push @ratings, $sentence{'rating_id'}; # mark this sentence as printed and move on $seen_sentence_id{$sentence{'sentence.id'}} = 1; } # calculate average rating my $total_rating; my $num_of_ratings; foreach my $rating (@ratings) { $total_rating = $total_rating + $rating; $num_of_ratings++; } my $avg_rating = $total_rating / $num_of_ratings; print "\n
"; print "\n

average rating: $avg_rating"; my $end_time = time(); my $time_elapsed = $end_time - $begin_time; print "\n

query time: $time_elapsed seconds\n"; print $q->end_html();