#!/usr/local/perl5.10.0/bin/perl # Copyright 2008 # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . use English; use Carp qw( croak confess ); use POSIX qw(strftime); use HTTP::Status; use HTML::Entities; use LWP::Simple; use URI::Escape; use XML::Simple; use DBI; use Data::Dumper; use File::Copy; use IO::File; $log_file = "log.txt"; $log_level = 4; open LOG, ">$log_file" or die "Error: can't open log file $log_file $!\n"; autoflush LOG 1; sub Debug { my $lvl = shift; if ( $lvl <= $log_level && defined( fileno( LOG) ) ) { my $ts = strftime( "%d-%b-%Y %T", localtime( time())); print LOG "$ts @_\n"; } } sub BAD { Debug( 0, @_ ); } sub LOG { Debug( 1, @_ ); } sub TRC { Debug( 2, @_ ); } sub DBG { Debug( 3, @_ ); } sub XDB { Debug( 4, @_ ); } $gm_base_search_url = "http://goldmine.uaf.edu/uhtbin/cgisirsi.exe/x/UAFRAS/x/25?library=UAFRAS&class=ALPHANUM&searchdata=dvd-"; $gm_base_direct_url = "http://goldmine.uaf.edu/uhtbin/cgisirsi.exe/x/UAFRAS/x/20/DVD-1/1/X"; $gm_found_total = 0; $gm_found_had_already = 0; $gm_found_current = 1; # number of items found on current goldmine page (we're done when this gets to 0) $gm_search_num = 0; $imdb_base_url = "http://www.imdb.com/find?s=tt&q="; $imdb_found = 0; $imdb_img_found = 0; $rt_base_url = "http://www.rottentomatoes.com"; $rt_base_url_imdb_title = "$rt_base_url/alias?type=imdbid&s="; $rt_base_url_full_search = "$rt_base_url/search/full_search.php?search="; $rt_found = 0; $rt_img_found = 0; $fc_base_url = "http://www.freecovers.net/api/search/"; $fc_found = 0; $drupal_file_path = "sites/lib-dev.dcc.uaf.edu.mark_test_drupal_moviebrowser/files"; $drupal_directory = "/usr/local/apache/htdocs/mark_test_drupal/" . $drupal_file_path; # for these two parameters, 1 means skip and 0 means do not skip(update txt file and db) $skip_if_already_have_txt = 1; $skip_if_already_in_db = 1; $dbh = DBI->connect( 'dbi:mysql:drupal62', 'drupal62', 'drupal62' ) or die "ERROR: could not conenct to database: $DBI::errstr"; $dbh->{AutoCommit} = 0; $data_directory = "files"; mkdir( $data_directory ) unless -d $data_directory; $gm_last_dvd_number = `ls -rt $data_directory/*.txt 2>/dev/null | tail -n 1`; if( length( $gm_last_dvd_number ) > 0 ) { $gm_last_dvd_number =~ s:$data_directory/DVD-(\d+)\w*.txt:$1:; chomp $gm_last_dvd_number; } else { $gm_last_dvd_number = 0; } LOG( "Starting from dvd $gm_last_dvd_number" ); MAIN_LOOP: while( $gm_found_current > 0 ) { # goldmine starts displaying at one number less than you asked for # if you want to start at dvd #14 you need to request #15 (if we want #14 that means the last one we saw was #13) # so, we are always adding 2 to the dvd number which is why it starts at -2... $gm_search_url = $gm_base_search_url . $gm_last_dvd_number; $gm_search_file = "goldmine-" . $gm_last_dvd_number . ".html"; $gm_search_num += 1; getstore( $gm_search_url, $gm_search_file ); BAD( "no \"$gm_search_file\" file found, getstore failed?" ) unless -f $gm_search_file; LOG( "got gm page $gm_search_num - getstore( $gm_search_url, $gm_search_file )" ); open GOLDMINE_SEARCH, $gm_search_file or $err = "I can't open my goldmine file \"$gm_search_file\"\n" and BAD( $err ); # and croak $err; $gm_found_current = 0; $goldmine_loop_count = 0; GOLDMINE_SEARCH_LOOP: while( $line = ) { $goldmine_loop_count += 1; # skip ahead until we find a line with 'Details' next GOLDMINE_SEARCH_LOOP unless $line =~ m/Details/; undef $gm_call_number; undef $gm_title; undef $gm_sort_title; undef $gm_sub_title; undef $gm_author; undef $gm_url; undef $gm_id; undef $gm_alt_title; $gm_found_total += 1; $gm_found_current += 1; # the goldmine_link_url, which contains the call number, is 3 lines after the 'Details line' - so read the next 3 lines $line = ; $line = ; $line = ; # pull the call number out of the url and create a new goldmine_link_url that doesn't have the session junk in it ( $gm_id = $line ) =~ s|^\s*\.*|$1|; chomp $gm_id; $gm_url = $gm_base_direct_url . $gm_id; # read the next line $line = ; # the first item we find on the page might be different than all of the others, it has some stuff bolded # so, if we see something that starts with an html tag then read in the next 2 lines if( $line =~ m/\s*; $line = ; } # now we have the goldmine_link_text and just need to trim some whitespace off of it ( $gm_call_number = $line ) =~ s/^\s*(.*)\s*$/$1/; $gm_call_number =~ s/\.|\s*VIDEODISC//g; $gm_call_number =~ s/\s+/_/g; XDB( $gm_call_number ); if( ! length( $gm_call_number ) ) { BAD( "\$gm_call_number is zero length, skipping to next goldmine search item" ); next GOLDMINE_SEARCH_LOOP; } elsif( $gm_call_number =~ m/^DVI|^DX]/ ) { LOG( "\"$gm_call_number\" it looks like we started finding non-dvds... exiting" ); last MAIN_LOOP; } # keep track of the number of the dvd so we can make the next request to goldmine ( $gm_last_dvd_number = $gm_call_number ) =~ s/.*DVD-(\d+)\w*/$1/i; # have we already got this dvd on file? $info_file = "$data_directory/$gm_call_number.txt"; if( -f $info_file && $skip_if_already_have_txt ) { # for now just skip ones we already have... $gm_found_had_already += 1; LOG( "\"$gm_call_number\" Skipped because it already has a .txt file" ); next GOLDMINE_SEARCH_LOOP; } # next line might be the title/sub_title or it might be some more html tags, skip the tags until we get to the sub_title $line = ; if( $line =~ m/<\/a>/ ) { $line = ; } else { $line = ; $line = ; $line = ; } chomp( $line ); XDB( "$gm_link_text (line ) ||$line||" ); # trim whitespace and br tags from the title and sub_title ( $gm_title = $line ) =~ s/^\s*
(.*)[\s\[]+videorecording[\s=:\/\]]+(.*)/$1/; $gm_title =~ s/\s*$//; $gm_title =~ s/\*//g; ( $gm_sub_title = $line ) =~ s/.*videorecording[\]\}]?//; if ( $gm_sub_title =~ m/^\s*[=:]/ ) { $gm_sub_title =~ s/^\s*[:=]\s+//; ( $gm_alt_title = $gm_sub_title ) =~ s/(.*)\s*\/\s*(.*)/$1/; $gm_sub_title =~ s/(.*)\s*\/\s*(.*)/$2/; } else { $gm_alt_title = ""; $gm_sub_title =~ s/^\s*\/\s*//; } XDB( "$gm_link_text (title) ||$gm_title||" ); XDB( "$gm_link_text (sub t) ||$gm_sub_title||" ); XDB( "$gm_link_text (alt t) ||$gm_alt_title||" ); # remove articles from the beginning of title for sorting # use alternate (english) title for sorting if( length( $gm_alt_title ) < 1 ) { ( $gm_sort_title = $gm_title ) =~ s/^\"\s*|^an?\s+|^the\s+|l[e,a]\s+|^los\s+|^un\s+|^un[a,e]\s+//i; } else { ( $gm_sort_title = $gm_alt_title ) =~ s/^\"\s*|^an?\s+|^the\s+|l[e,a]\s+|^los\s+|^un\s+|^un[a,e]\s+//i; } # read in 2 more lines to find the author $line = ; $line = ; # trim whitespace and br tags from the author ( $gm_author = $line ) =~ s/^\s*
(.*)\s*$/$1/; decode_entities( $gm_title ); decode_entities( $gm_sub_title ); decode_entities( $gm_sort_title ); decode_entities( $gm_author ); if( ! length( $gm_title ) ) { BAD( "$gm_call_number \$gm_title is zero length, skipping to next goldmine search item" ); next GOLDMINE_SEARCH_LOOP; } XDB( "$gm_call_number \$gm_title = ||$gm_title||" ); XDB( "$gm_call_number \$gm_alt_title = ||$gm_alt_title||" ); XDB( "$gm_call_number \$gm_sort_title = ||$gm_sort_title||" ); XDB( "$gm_call_number \$gm_sub_title = ||$gm_sub_title||" ); XDB( "$gm_call_number \$gm_author = ||$gm_author||" ); # now look at the detail page for goldmine $gm_exact_file = "goldmine-$gm_call_number.html"; getstore( $gm_url, $gm_exact_file ); if( ! -f $gm_exact_file ) { BAD( "$gm_call_number no \"$gm_exact_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number exact search got: $gm_url" ); } open GOLDMINE_EXACT, $gm_exact_file or $err = "$gm_call_number I can't open my goldmine exact file \"$gm_exact_file\"\n" and BAD( $err ); # and croak $err; GOLDMINE_EXACT_LOOP: while( $line = ) { if( $line =~ m/\s*Summary:$/ ) { $line = ; $line = ; $line = ; ( $gm_synopsis = $line ) =~ s/^\s*//; $gm_synopsis =~ s/^\"//; $gm_synopsis =~ s/\"$//; $gm_synopsis =~ s/\"/'/; chomp $gm_synopsis; close GOLDMINE_EXACT; } next GOLDMINE_EXACT_LOOP; } close GOLDMINE_EXACT; unlink $gm_exact_file; decode_entities( $gm_synopsis ); XDB( "$gm_call_number \$gm_synopsis = ||$gm_synopsis||" ); # alrighty then, now we have enough information to make a request to imdb undef $imdb_url_search; undef $imdb_file; undef $imdb_title; undef $imdb_url; undef $imdb_exact_file; undef $imdb_genre; undef $imdb_synopsis; undef $imdb_img_url; undef $imdb_img_type; undef $imdb_img_file; undef $rt_title; undef $rt_url; undef $rt_file; undef $rt_img_url; undef $rt_img_type; undef $rt_img_file; undef $rt_synopsis; undef $rt_url_title; undef $rt_file_title; undef $rt_file_direct; undef $rt_url; $imdb_url_search = $imdb_base_url . uri_escape( $gm_title ); $imdb_file = "imdb-$gm_call_number.html"; getstore( $imdb_url_search, $imdb_file ); if( ! -f $imdb_file ) { BAD( "$gm_call_number no \"$imdb_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $imdb_url_search" ); } open IMDB_SEARCH, $imdb_file or $err = "$gm_call_number I can't open my imdb search file \"$imdb_file\"\n" and BAD( $err ); # and croak $err; $imdb_search_loop_count = 0; IMDB_SEARCH_LOOP: while( $line = ) { $imdb_search_loop_count += 1; if( $line =~ m/Displaying / || $line =~ m/\\Q$gm_title\E/ ) { close IMDB_SEARCH; $imdb_found += 1; if( $line =~ m/Displaying / ) { LOG( "$gm_call_number imdb title search using first match of several found" ); ( $imdb_title = $line ) =~ s:^.*?\
\Q$gm_title\E/ ) { $imdb_exact_file = $imdb_file; # close and then reopen the file - this is ugly LOG( "$gm_call_number imdb title search found exactly one exact match" ); } else { $err = "$gm_call_number Error: we should never get to this code! read line \n$line\n from $imdb_url_search ($imdb_file) looking for $gm_title\n"; BAD( $err ); confess $err; } open IMDB_EXACT, $imdb_exact_file or $err = "$gm_call_number I can't open my imdb exact file \"$imdb_exact_file\"\n" and BAD( $err ); # and croak $err; $imdb_exact_loop_count = 0; IMDB_EXACT_LOOP: while( $line = ) { $imdb_exact_loop_count += 1; if( $line =~ m/\
/ ) { $line = ; if( $line !~ m/src/ ) { $line = ; } if( ! defined( $imdb_title ) ) # this will happen if our title search found only one exact match { ( $imdb_title = $line ) =~ s:^.*\Genre:\<\/h5\>/ ) { $line = ; chomp( $line ); ( $imdb_genre = $line ) =~ s/\]*\>//g; # remove html tags $imdb_genre =~ s/^\s+|\s+$|\|//g; # trim whitespace, remove pipes $imdb_genre =~ s/\s+/ /g; # consolodate whitespace LOG( "$gm_call_number found imdb genre: \"$imdb_genre\"" ); } elsif( $line =~ m/\Plot:\<\/h5\>/ ) { $line = ; chomp( $line ); ( $imdb_synopsis = $line ) =~ s/\<.*$//; # get rid of the first html tag and everything after it... LOG( "$gm_call_number found imdb synopsis" ); XDB( $imdb_synopsis ); } else { next IMDB_EXACT_LOOP; } } XDB( "$gm_call_number imdb exact loop count = $imdb_exact_loop_count" ); close IMDB_EXACT; unlink $imdb_exact_file; # since there was a match in imdb, we can also check rottentomatoes using the imdb title number # if only the library used this number instead of just making up their own number based on the # order in which the dvd was purchased, this would be a lot easier... undef $rt_url; undef $rt_file; undef $rt_img_url; undef $rt_img_type; undef $rt_img_file; undef $rt_synopsis; undef $rt_url_title; undef $rt_file_title; undef $rt_file_direct; undef $rt_url; $rt_url = "$rt_base_url_imdb_title$rt_title"; $rt_file = "rt-$rt_title.html"; getstore( $rt_url, $rt_file ); if( ! -f $rt_file ) { BAD( "$gm_call_number no \"$rt_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $rt_url" ); } open RT_NUMBER, $rt_file or $err = "$gm_call_number I can't open my rt search by title number file \"$rt_file\"\n" and BAD( $err ); # and croak $err; $rt_title_loop_count = 0; RT_LOOP_NUMBER: while( $line = ) { $rt_title_loop_count += 1; if( $line =~ m/No results found for/ ) { LOG( "$gm_call_number no rt match found" ); } elsif( $line =~ m/\
/ ) { LOG( "$gm_call_number possible rt image found" ); #XDB( $line ); $rt_found += 1; $line = ; $line = ; #XDB( $line ); ( $rt_img_url = $line ) =~ s/^.*src=\"([^\"]+)\".*$/$1/; chomp( $rt_img_url ); ( $rt_img_type = $imdb_img_url ) =~ s/^.*\.(\w+)$/$1/; $rt_img_file = "$data_directory/$gm_call_number-rt.$rt_img_type"; DBG( "$gm_call_number \$rt_img_url = \"$rt_img_url\"" ); DBG( "$gm_call_number \$rt_img_file = \"$rt_img_file\"" ); getstore( $rt_img_url, $rt_img_file ); if( ! -f $rt_img_file ) { BAD( "$gm_call_number no \"$rt_img_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $rt_img_url" ); $rt_img_found += 1; } } elsif( $line =~ m/\(.*)\<\/span\>$/$1/; ( $rt_synopsis = $line ) =~ s/^\s*//; $rt_synopsis =~ s/\<.*?\>//g; chomp( $rt_synopsis ); LOG( "$gm_call_number found rt sysopsis" ); XDB( $rt_synopsis ); } else { next RT_LOOP_NUMBER; } } XDB( "$gm_call_number rt title loop count = $rt_title_loop_count" ); close RT_NUMBER; unlink $rt_file; } elsif( $line =~ m/No Matches\./ ) { # imdb doesn't know about this dvd :( # since we are not going to find what we are looking for, close the file close IMDB_SEARCH; LOG( "$gm_call_number No imdb Matches for \"$gm_title\"" ); } else { next IMDB_SEARCH_LOOP; } } XDB( "$gm_call_number imdb_search loop count = $imdb_search_loop_count" ); # if the file is still open at this point then there was something unexpected in it that we didn't match close IMDB_SEARCH; unlink $imdb_file; # it is still possible that the movie is in rotten tomatoes but we didn't find it using the imdb title # so, we can also search rotten tomatoes by title, let's try that if( length( $rt_synopsis ) < 1 ) { LOG( "$gm_call_number trying rt search by title" ); $rt_url_title = "$rt_base_url_full_search" . uri_escape( $gm_title ); $rt_file_title = "rt-$gm_link_text.html"; getstore( $rt_url_title, $rt_file_title ); if( ! -f $rt_file_title ) { BAD( "$gm_call_number no \"$rt_file_title\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $rt_url_title" ); } open RT_TITLE, $rt_file_title or $err = "$gm_call_number I can't open my rt search by title file \"$rt_file_title\"\n" and BAD( $err ); # and croak $err; $rt_search_loop_count = 0; ( $rt_temp_search_title = $gm_sort_title ) =~ s/\s+/_/g; RT_LOOP_TITLE: while( $line = ) { $rt_search_loop_count += 1; if( $line =~ m/No results found for/ ) { LOG( "$gm_call_number no rt match found" ); close RT_TITLE; } #elsif( $line =~ m/\/m\/\Q$gm_title\E/i && $line =~ m/http:\/\// && $line !~ m/Search Results/ && $line !~ m/login|register/ ) elsif( $line =~ m/\/m\/\Q$rt_temp_search_title\E/i ) { LOG( "$gm_call_number possible rt exact url found" ); #XDB( $line ); close RT_TITLE; #( $rt_url = $line ) =~ s:^.*\ ) { $rt_direct_loop_count += 1; if( $line =~ m/\
/ ) { LOG( "$gm_call_number posible rt image found" ); $rt_found += 1; while( $line = ) { last if( $line =~ m/img\s+ src/i ); XDB( "$gm_call_number skipping a line in $rt_file_direct" ); } #XDB( "\$line = ||$line||" ); ( $rt_img_url = $line ) =~ s/^.*src="([^"]+)".*$/$1/; chomp( $rt_img_url ); XDB( "\$rt_img_url = ||$rt_img_url||" ); ( $rt_img_type = $rt_img_url ) =~ s/^.*\.(\w+)$/$1/; $rt_img_file = "$data_directory/$gm_call_number-rt.$rt_img_type"; getstore( $rt_img_url, $rt_img_file ); if( ! -f $rt_img_file ) { BAD( "$gm_call_number no \"$rt_img_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $rt_img_url" ); $rt_img_found += 1; } } elsif( $line =~ m/\(.*)\<\/span\>$/$1/; $rt_synopsis =~ s/^\s*//; $rt_synopsis =~ s/\<.*\>//; chomp( $rt_synopsis ); LOG( "$gm_call_number rt synopsis found" ); XDB( $rt_synopsis ); } else { next RT_LOOP_DIRECT; } } XDB( "$gm_call_number rt direct loop count = $rt_direct_loop_count" ); close RT_DIRECT; unlink $rt_file_direct; } } close RT_TITLE; unlink( $rt_file_title ); XDB( "$gm_call_number rt search loop count = $rt_search_loop_count" ); } # let's grab a cover from freecovers.net undef $fc_file; undef $fc_url; undef $fc_img_url; undef $fc_img_type; undef $fc_img_file; DBG( "$gm_call_number trying freecovers.net search by title" ); $fc_url = "$fc_base_url" . uri_escape( $gm_title ); $fc_file = "fc-$gm_call_number.html"; getstore( $fc_url, $fc_file ); if( ! -f $fc_file ) { BAD( "$gm_call_number no \"$fc_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $fc_url" ); $fc_found += 1; $fc_xml_hash = eval { XMLin( $fc_file ) }; BAD( $@ ) if( $@ ); if( defined( $fc_xml_hash ) ) { XDB( "\$fc_xml_hash is defined" ); if( defined( $fc_xml_hash->{'err'}->{'msg'} ) ) { BAD( "$gm_call_number fc search - " . $fc_xml_hash->{'err'}->{'msg'} ); } elsif( defined( $fc_xml_hash->{'title'} ) ) { XDB( "\$fc_xml_hash->{'title'} is defined" ); if( defined( $fc_xml_hash->{'title'}->{$gm_title} ) ) { XDB( "\$fc_xml_hash->{'title'}->{$gm_title} is defined" ); if( defined( $fc_xml_hash->{'title'}->{$gm_title}->{'image'} ) ) { XDB( $fc_xml_hash->{'title'}->{$gm_title}->{'image'} ); $fc_img_url = $fc_xml_hash->{'title'}->{$gm_title}->{'image'}; ( $fc_img_type = $fc_img_url ) =~ s/^.*\.(\w+)$/$1/; $fc_img_file = "$data_directory/$gm_call_number-fc.$fc_img_type"; getstore( $fc_img_url, $fc_img_file ); if( ! -f $fc_img_file ) { BAD( "$gm_call_number no \"$fc_img_file\" file found, getstore failed?" ); } else { LOG( "$gm_call_number got: $fc_img_url" ); $fc_img_found += 1; } } else { XDB( "\$fc_xml_hash->{'title'}->{$gm_title}->{'image'} is not defined" ); } } else { XDB( "\$fc_xml_hash->{'title'}->{$gm_title} is not defined" ); for $t ( sort keys %{$fc_xml_hash->{'title'}} ) { XDB( "Title contained in array: \"$t\"" ); } } } else { XDB( "\$fc_xml_hash->{'title'} is not defined" ); } } else { XDB( "\$fc_xml_hash is not defined" ); } } unlink( $fc_file ); undef $fc_xml_hash; # save dvd info to file open INFO, ">$info_file" or $err = "$gm_call_number I can't open \"$info_file\" to save dvd info\n" and BAD( $err ); # and croak $err; if( length( $gm_call_number ) > 0 ) { $gm_call_number =~ s|'|\'|g; print INFO "\$gm_call_number = '$gm_call_number;\n"; } if( length( $gm_title ) > 0 ) { $gm_title =~ s|'|\'|g; print INFO "\$gm_title = '$gm_title';\n"; } if( length( $gm_alt_title ) > 0 ) { $gm_alt_title =~ s|'|\'|g; print INFO "\$gm_alt_title = '$gm_alt_title';\n"; } if( length( $gm_sort_title ) > 0 ) { $gm_sort_title =~ s|'|\'|g; print INFO "\$gm_sort_title = '$gm_sort_title';\n"; } if( length( $gm_sub_title ) > 0 ) { $gm_sub_title =~ s|'|\'|g; print INFO "\$gm_sub_title = '$gm_sub_title';\n"; } if( length( $gm_author ) > 0 ) { $gm_author =~ s|'|\'|g; print INFO "\$gm_author = '$gm_author';\n"; } if( length( $gm_id ) > 0 ) { $gm_id =~ s|'|\'|g; print INFO "\$gm_id = '$gm_id';\n"; } if( length( $gm_url ) > 0 ) { $gm_url =~ s|'|\'|g; print INFO "\$gm_url = '$gm_url';\n"; } if( length( $gm_synopsis ) > 0 ) { $gm_synopsis =~ s|'|\'|g; print INFO "\$gm_synopsis = '$gm_synopsis';\n"; } if( length( $imdb_title ) > 0 ) { $imdb_title =~ s|'|\'|g; print INFO "\$imdb_title = '$imdb_title';\n"; } if( length( $imdb_genre ) > 0 ) { $imdb_genre =~ s|'|\'|g; print INFO "\$imdb_genre = '$imdb_genre';\n"; } if( length( $imdb_synopsis ) > 0 ) { $imdb_synopsis =~ s|'|\'|g; print INFO "\$imdb_synopsis = '$imdb_synopsis';\n"; } if( length( $imdb_url_search ) > 0 ) { $imdb_url_search =~ s|'|\'|g; print INFO "\$imdb_url_search = '$imdb_url_search';\n"; } if( length( $imdb_url ) > 0 ) { $imdb_url =~ s|'|\'|g; print INFO "\$imdb_url = '$imdb_url';\n"; } if( length( $imdb_img_url ) > 0 ) { $imdb_img_url =~ s|'|\'|g; print INFO "\$imdb_img_url = '$imdb_img_url';\n"; } if( length( $imdb_img_file ) > 0 ) { $imdb_img_file =~ s|'|\'|g; print INFO "\$imdb_img_file = '$imdb_img_file';\n"; } if( length( $rt_title ) > 0 ) { $rt_title =~ s|'|\'|g; print INFO "\$rt_title = '$rt_title';\n"; } if( length( $rt_url_title ) > 0 ) { $rt_url_title =~ s|'|\'|g; print INFO "\$rt_url_title = '$rt_url_title';\n"; } if( length( $rt_url ) > 0 ) { $rt_url =~ s|'|\'|g; print INFO "\$rt_url = '$rt_url';\n"; } if( length( $rt_img_url ) > 0 ) { $rt_img_url =~ s|'|\'|g; print INFO "\$rt_img_url = '$rt_img_url';\n"; } if( length( $rt_img_file ) > 0 ) { $rt_img_file =~ s|'|\'|g; print INFO "\$rt_img_file = '$rt_img_file';\n"; } if( length( $rt_synopsis ) > 0 ) { $rt_synopsis =~ s|'|\'|g; print INFO "\$rt_synopsis = '$rt_synopsis';\n"; } if( length( $fc_img_url ) > 0 ) { $fc_img_url =~ s|'|\'|g; print INFO "\$fc_img_url = '$fc_img_url';\n"; } if( length( $fc_img_file ) > 0 ) { $fc_img_file =~ s|'|\'|g; print INFO "\$fc_img_file = '$fc_img_file';\n"; } close INFO; # put it in the db # use this line to clear all data from the relavent mysql tables in the drupal 62 database # truncate table moviecontent_type_movie; truncate table movienode; truncate table movienode_revisions; truncate table moviefiles; truncate table movieterm_node; truncate table movieterm_data; $query = "SELECT nid FROM moviecontent_type_movie WHERE field_dvdb_call_number_value='$gm_call_number'"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $result = $sth->fetchrow_hashref(); $sth->finish(); if( defined( $result->{field_dvdb_call_number_value} ) && $skip_if_already_in_db ) { LOG( "$gm_call_number \"$gm_title\" already in database as nid " . $result->{nid} ); next MAIN_LOOP; } undef $next_nid; undef $next_fid; $query = "SELECT MAX(nid) + 1 AS nid, MAX(fid) + 1 AS fid FROM movienode, moviefiles"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $result = $sth->fetchrow_hashref(); $sth->finish(); if( defined( $result->{nid} ) && $result->{nid} > 1 ) { $next_nid = $result->{nid}; DBG( "$gm_call_number next nid: $next_nid (from database)" ); } else { $next_nid = 10; DBG( "$gm_call_number next nid: $next_nid (generated by script...)" ); } if( defined( $result->{fid} ) && $result->{fid} > 1 ) { $next_fid = $result->{fid}; DBG( "$gm_call_number next fid: $next_fid (from database)" ); } else { $next_fid = 10; DBG( "$gm_call_number next fid: $next_fid (generated by script...)" ); } $query = "INSERT INTO movienode " . "(nid, vid, type, language, title, uid, status, created, changed, comment, promote, moderate, sticky, tnid, translate) " . "VALUES ($next_nid, $next_nid, 'movie', '', " . $dbh->quote( $gm_title ) . ", 1, 1, 0, 0, 0, 1, 0, 0, 0, 0)"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); $query = "INSERT INTO movienode_revisions " . "(nid, vid, uid, title, body, teaser, log, timestamp, format) " . "VALUES($next_nid, $next_nid, 1, " . $dbh->quote( $gm_title ) . ", '', '', '', 0, 0)"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); $query ="INSERT INTO moviecontent_type_movie " . "(vid, nid, field_dvdb_alt_title_value, field_dvdb_sort_title_value, " . "field_dvdb_imdb_code_value, field_dvdb_call_number_value, field_dvdb_short_summary_value, field_dvdb_long_summary_value, " . "field_dvdb_cover_fid, field_dvdb_cover_list, field_dvdb_cover_data, field_dvdb_record_id_value, field_dvdb_rotten_tomatoes_code_value) " . "VALUES ($next_nid, $next_nid, " . $dbh->quote( $gm_alt_title ) . ", " . $dbh->quote( $gm_sort_title ) . ", " . "'$imdb_title', '$gm_call_number', " . $dbh->quote( $gm_sub_title ) . ", " . $dbh->quote( $gm_synopsis ) . ", " . "$next_fid, '0', 'a:0:{}', " . $dbh->quote( $gm_id ) . ", " . $dbh->quote( $rt_url ) . " )"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); undef $file_name; if( -f $fc_img_file ) { ( $file_name = $fc_img_file ) =~ s/^$data_directory\///; XDB( "$gm_call_number using free covers image $fc_img_file" ); } elsif( -f $rt_img_file ) { ( $file_name = $rt_img_file ) =~ s/^$data_directory\///; XDB( "$gm_call_number using rotten tomatoes image $rt_img_file" ); } elsif( -f $imdb_img_file ) { ( $file_name = $imdb_img_file ) =~ s/^$data_directory\///; XDB( "$gm_call_number using imdb image $imdb_img_file" ); } else { $file_name = "nopic.png"; XDB( "$gm_call_number using default image $file_name" ); } if( $file_name !~ m/^nopic.png$/ ) { copy( "$data_directory/$file_name", "$drupal_directory/$file_name" ); DBG( "$gm_call_number copy( \"$data_directory/$file_name\", \"$drupal_directory/$file_name\" )" ) ; } $query = "INSERT INTO moviefiles " . "(fid, uid, filename, filepath, filemime, filesize, status, timestamp) " . "VALUES ($next_fid, 1, '$file_name', '$drupal_file_path/$file_name', '', '',1 , '')"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); @genres = split( ' ', $imdb_genre ); foreach $genre ( @genres ) { undef $next_tid; $query = "SELECT tid as tid FROM movieterm_data WHERE name=" . $dbh->quote( $genre ); DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $result = $sth->fetchrow_hashref(); $sth->finish(); if( defined( $result->{tid} ) && $result->{tid} > 0 ) { LOG( "$gm_call_number genre \"$genre\" already in database" ); $next_tid = $result->{tid}; } else { LOG( "$gm_call_number Adding genre \"$genre\" to database" ); $query = "INSERT INTO movieterm_data " . "(tid, vid, name, description, weight) " . "VALUES('', 1, " . $dbh->quote( $genre ) . ", " . $dbh->quote( $genre ) . ", 0)"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); $query = "SELECT tid as tid FROM movieterm_data WHERE name=" . $dbh->quote( $genre ); DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $result = $sth->fetchrow_hashref(); $sth->finish(); if( defined( $result->{tid} ) && $result->{tid} > 0 ) { $next_tid = $result->{tid}; $query = "INSERT INTO movieterm_hierarchy " . "(tid, parent) " . "VALUES($next_tid, 0)"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); } else { BAD( "$gm_call_number I just added genre \"$genre\" to the database, but I can't find it there now?" ); } } $query = "INSERT INTO movieterm_node (nid, vid, tid) VALUES($next_nid, $next_nid, $next_tid)"; DBG( "$gm_call_number $query" ); $sth = $dbh->prepare( $query ); $sth->execute(); $sth->finish(); } } $dbh->commit(); XDB( "goldmine loop count = $goldmine_loop_count" ); LOG( "found $gm_found_current items on goldmine request $gm_search_num => $gm_search_url" ); # multiple matches happen in the goldmine file so we need to read the whole thing and close after the loop ends close GOLDMINE_SEARCH; unlink $gm_search_file; } $dbh->disconnect(); LOG( "$gm_search_num goldmine pages -> $gm_found_total items, " . ( $gm_found_total - $gm_found_had_already ) . " were new" ); LOG( "$imdb_found found in imdb, $imdb_img_found had images" ); LOG( "$rt_found found in rotten tomatoes, $rt_img_found had images" ); LOG( "$fc_found found on freecovers.net, $fc_img_found had images" );