number of times words appears
Input: $string - the string to analize
$words - an array consisting of word => number of times words appear
if there are existing values, they will be added to.
This means you can pass in a series of strings and
get the overall totals
*/
function countWords($string,$words) {
foreach(explode(' ',$string) as $word) {
//print("$word
");
$word = strtolower($word);
$word = str_replace(array(':',
';',
',',
"\'",
'"',
'(',
')',
'|',
'/',
'?',
'!',
'@',
'#',
'$',
'%',
'^',
'&',
'*',
"\\",
'.',
'+',
'=',
'_',
'~',
'`',
'"'),
'',
$word);
/* - not removedkept out because hypens might be important probably could just focus on beginning and end of strings */
/* simple stop list */
if($word != '' &&
$word != 'the' &&
$word != 'a' &&
$word != 's' &&
$word != 'of' &&
$word != 'on' &&
$word != 'in' &&
$word != 'an' &&
$word != 'if' &&
$word != 'to' &&
$word != 'and') {
$words[$word]++;
}
}
return($words);
}
function launch()
{
global $configArray;
global $interface;
global $user;
$interface->caching = false;
if (!isset($_GET['author'])) {
PEAR::raiseError(new PEAR_Error('Unknown Author'));
} else {
$interface->assign('author', $_GET['author']);
}
// Retrieve User Search History
if (isset($_COOKIE['search'])) {
$sHistory = unserialize($_COOKIE['search']);
$lastSearch = $sHistory[count($sHistory) - 1];
$interface->assign('lastsearch', $lastSearch);
}
if (!$interface->is_cached('layout.tpl|Author' . $_GET['author'])) {
// Clean up author string
$author = $_GET['author'];
if (substr($author, strlen($author) - 1, 1) == ",") {
$author = substr($author, 0, strlen($author) - 1);
}
$author = explode(',', $author);
$interface->assign('author', $author);
$authornaf = $_GET['authornaf'];
//We'll now search to see if we can find
//a wikipedia article that seems associated with the
//author by using common title words
// Connect To Wikipedia
if (!isset($_GET['page']) || ($_GET['page'] == 1)) {
// Get records by this author
$this->db = new SOLR($configArray['SOLR']['url']);
$result = $this->db->query('authornaf:"' . $_GET['authornaf'] . '"', null, 0, 20);
/* The result will have some information about
the SOLR query and also information about
each record. Issue is this is an array of arrays,
unless there's only one result, then it's just
an array with values */
if (is_array($result['record'][0])) {
$records = $result['record'];
}
else if (is_array($result['record'])){
$records = array($result['record']);
}
$titles = array();
$words = array();
for($i = 0;$i < count($records);$i++) {
$words = $this->countWords($records[$i]['title'],$words);
}
asort($words);
/* now the words should be sorted from most frequent to least */
$words = array_keys($words);
/* now we search for the author words (from
earlier processing) and the two most common
words. Why? Some rouging testing seem to
indicate this was a good number. */
$url = "http://en.wikipedia.org/w/index.php?title=Special:Search&search=" . urlencode("$author[1] $author[0] " .array_pop($words) . " ". array_pop($words) );
//Now we examine the results.
$client = new HTTP_Request();
$client->setMethod(HTTP_REQUEST_METHOD_GET);
$client->setURL($url);
$result = $client->sendRequest();
if (!PEAR::isError($result)) {
$xmlstring = $client->getResponseBody();
}
else {
print("