<?php
/**
 *
 * Copyright (C) Villanova University 2007.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */
 
require_once 'Action.php';

require_once 
'HTTP/Request.php';
require_once 
'Pager/Pager.php';

class 
Home extends Action
{
    private 
$db;
    
/* 
     Return: array consisting of word => number of times words appears
     Input: $string - the string to analize
            $words  - an array consisting of word => number of times words appear
                      if there are existing values, they will be added to.
                      This means you can pass in a series of strings and
                      get the overall totals

    */
    
function countWords($string,$words) {
      foreach(
explode(' ',$string) as $word) {
    
//print("$word <br />");
    
$word strtolower($word);
    
$word str_replace(array(':',
                  
';',
                  
',',
                  
"\'",
                  
'"',
                  
'(',
                  
')',
                  
'|',
                  
'/',
                  
'?',
                  
'!',
                  
'@',
                  
'#',
                  
'$',
                  
'%',
                  
'^',
                  
'&',
                  
'*',
                  
"\\",
                  
'.',
                  
'+',
                  
'=',
                  
'_',
                  
'~',
                  
'`',
                  
'"'),
                
'',
                
$word);
    
/* - not removedkept out because hypens might be important probably could just focus on beginning and end of strings */
        /* simple stop list */
    
if($word != '' &&
       
$word != 'the' &&
       
$word != 'a' &&
       
$word != 's' &&
       
$word != 'of' &&
       
$word != 'on' &&
       
$word != 'in' &&
       
$word != 'an' &&
       
$word != 'if' &&
       
$word != 'to' &&
       
$word != 'and') {
      
$words[$word]++;
    }
      }
      return(
$words);
    }
    


    function 
launch()
    {
        global 
$configArray;
        global 
$interface;
        global 
$user;
        
        
$interface->caching false;
        
        if (!isset(
$_GET['author'])) {
            
PEAR::raiseError(new PEAR_Error('Unknown Author'));
        } else {
            
$interface->assign('author'$_GET['author']);
        }
        
        
// Retrieve User Search History
        
if (isset($_COOKIE['search'])) {
            
$sHistory unserialize($_COOKIE['search']);
            
$lastSearch $sHistory[count($sHistory) - 1];
            
$interface->assign('lastsearch'$lastSearch);
        }
        
        if (!
$interface->is_cached('layout.tpl|Author' $_GET['author'])) {
// Clean up author string
            
$author $_GET['author'];
            if (
substr($authorstrlen($author) - 11) == ",") {
                
$author substr($author0strlen($author) - 1);
            }
            
$author explode(','$author);
            
$interface->assign('author'$author);

        
$authornaf $_GET['authornaf'];
            
        
//We'll now search to see if we can find
        //a wikipedia article that seems associated with the 
        //author by using common title words

            // Connect To Wikipedia
            
if (!isset($_GET['page']) || ($_GET['page'] == 1)) {

          
// Get records by this author
          
$this->db = new SOLR($configArray['SOLR']['url']);
          
$result $this->db->query('authornaf:"' $_GET['authornaf'] . '"'null020);
          
          
/* The result will have some information about
                 the SOLR query and also information about
                 each record.  Issue is this is an array of arrays,
                 unless there's only one result, then it's just
                 an array with values */

          
if (is_array($result['record'][0])) {
        
$records $result['record'];
          }
          else if (
is_array($result['record'])){
        
$records = array($result['record']);
          }

          
$titles = array();
          
$words = array();
          
          for(
$i 0;$i count($records);$i++) {
        
$words $this->countWords($records[$i]['title'],$words);
          }


          
asort($words);

          
/* now the words should be sorted from most frequent to least */
          
$words array_keys($words);
          
              
/* now we search for the author words (from 
                 earlier processing) and the two most common 
                 words.  Why?  Some rouging testing seem to 
                 indicate this was a good number. */
          
$url "http://en.wikipedia.org/w/index.php?title=Special:Search&search=" urlencode("$author[1] $author[0] " .array_pop($words) . " "array_pop($words)  );
          
              
//Now we examine the results.
         
          
$client = new HTTP_Request();
          
$client->setMethod(HTTP_REQUEST_METHOD_GET);
          
$client->setURL($url);
          
$result $client->sendRequest();
          if (!
PEAR::isError($result)) {
        
$xmlstring $client->getResponseBody();
          }
          else { 
        print(
"<html><head><title>Error</title></head><body>error</body></html>");
          }
          
          
//need to suppress warnings
          //errors about id
          
$xmldoc = new DOMDocument();

          
//see http://www.mutinydesign.co.uk/scripts/problems-encountered-with-php-dom-functions---3/ on suppressing warnings -> bad html
          
@$xmldoc->loadHTML($xmlstring);
          
          
$docXpath = new DOMXPath($xmldoc);
          
          
//for some reason I haven't quite yet figured out,
          //registering the namespace isn't working, 
          //the dom class seems to ignore it in the source 
          //document
          
$query '/html/body/div[@id="globalWrapper"]/div[@id="column-content"]/div[@id="content"]/div[@id="bodyContent"]/ul[1]/li/a';
          
          
$links $docXpath->query($query);
          
$goodlink '';

          
//Now, I'll iterate through the results
          //I'm looking for the first result that
          //has all the parts of the author name in it
          //
          //This could definitely be improved
          
foreach($links as $link) {


        
$firstname $author[1];
        
$firstname str_replace(array('.',','),'',$firstname); 
        
$firstname trim($firstname);

        
        
$lastname $author[0];
        
$lastname str_replace(array('.',','),'',$lastname); 
        
$lastname trim($lastname);
        
        if (
stripos($link->nodeValue,$firstname) > -&&
            
stripos($link->nodeValue,$lastname) > -1
          {
            
            
//print("good link <br />");
            
$goodlink $link->attributes->getNamedItem('href')->nodeValue;
            break;
            
          }
          }

        
$title substr($goodlink,6);

        
$interface->assign('info'$info);

        
$url 'http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=php&titles=' $title;



        
//if we found something, display the wikipedia info
        //(in final version we'd want to have something displayed
        // if there wasn't a match or a more strict 
        
if ($goodlink != '') {
          
$client = new HTTP_Request();
          
$client->setMethod(HTTP_REQUEST_METHOD_GET);
          
$client->setURL($url);
          
$result $client->sendRequest();
          if (!
PEAR::isError($result)) {
        
$body unserialize($client->getResponseBody());


        
//Check if data exists or not
        
if(!$body['query']['pages']['-1']) {
          
$body array_shift($body['query']['pages']);
          
$info['name'] = $body['title'];
          
          
$body array_shift($body['revisions']);
          
$body explode("\n"$body['*']);
          
          
$done 0;
                        while(!
$done) {
              if(
$body[0] == '') {
                
array_shift($body);
                continue;
              }
              switch(
substr($body[0], 02)){
                                case 
"[[" :
              case 
"{{" :
              case 
"}}" :
                                case 
"]]" :
              case 
"| " :
                
//echo " sub : '" . substr($body[0], 0, 2) . "' ";
                
$stpos stripos($body[0], "image:");
                                    if(!
$stpos)
                                        
$stpos stripos($body[0], "image");
                                    if(
$stpos) {
                                        
$len 4;
                                        
$endpos stripos($body[0], ".jpg");
                                        if(!
$endpos) {
                                            
$len 4;
                                            
$endpos stripos($body[0], ".gif");
                                        }
                                        if(
$endpos) {
                                            
$image substr($body[0], $stpos,
                                                        
$endpos $len $stpos);
                                        }
                                    }
                                    
array_shift($body);
                                    break;
                                default :
                                    
$done 1;
                                    break;
                            }

                        }

                        
$desc "";
                        
$done 0;
                        while(!
$done) {
                            if(
substr($body[0], 02) == "==")
                                
$done 1;
                            else {
                                
$desc .= $body[0];
                                
array_shift($body);
                            }
                        }


                        
//Create links to wikipedia

                        
$pattern = array();
                        
$replacement = array();
                        
$pattern[] = '/(\x5b\x5b)([^\x5d|]*)(\x5d\x5d)/';
                        
$replacement[] = '<a href="http://en.wikipedia.org/wiki/$2">$2</a>';
                        
$pattern[] = '/(\x5b\x5b)([^\x5d]*)\x7c([^\x5d]*)(\x5d\x5d)/';
                        
$replacement[] = '<a href="http://en.wikipedia.org/wiki/$2">$3</a>';
                        
// Removes citation
                        
$pattern[] = '/({{)[^}]*(}})/';
                        
$replacement[] = "";

                        
$desc preg_replace($pattern$replacement$desc);

                        
$info['image'] = $image;
                        
$info['description'] = $desc;
            
$interface->assign('info'$info);

                    }
        }
        }
            }
        }

        
// Get records by this author
        
$this->db = new SOLR($configArray['SOLR']['url']);
        
$result $this->db->query('authornaf:"' $_GET['authornaf'] . '"'null020);
        if (isset(
$result['record']['id'])) {
            
// Hack for issue with XML_Serializer
            
$result['record'] = array($result['record']);
        }
        
$interface->assign('recordSet'$result['record']);

        
$link = (strstr($_SERVER['REQUEST_URI'], 'page=')) ? str_replace('page=' $_GET['page'], ''$_SERVER['REQUEST_URI']) . 'page=%d' $_SERVER['REQUEST_URI'] . '&page=%d';
        
$options = array('totalItems' => $result['RecordCount'],
                         
'mode' => 'sliding',
                         
'path' => '',
                         
'fileName' => $link,
                         
'delta' => 5,
                         
'perPage' => 20,
                         
'nextImg' => 'Next &raquo;',
                         
'prevImg' => '&laquo; Prev',
                         
'separator' => '',
                         
'spacesBeforeSeparator' => 0,
                         
'spacesAfterSeparator' => 0,
                         
'append' => false,
                         
'clearIfVoid' => true,
                         
'urlVar' => 'page',
                         
'curPageSpanPre' => '<span>',
                         
'curPageSpanPost' => '</span>');
        
$pager =& Pager::factory($options);
        
$interface->assign('pager'$pager);

        
$interface->setTemplate('home.tpl');
        
$interface->display('layout.tpl''Author' $_GET['author']);
    }
}

?>