250) { /* apply OTS */ // deriving ratio from word count - doesn't quite work $ratio = 20000/$count; $ratiofullstring = (string)$ratio; $dotpos = strpos($ratiofullstring, "."); $ratiostring = substr($ratiofullstring, 0, $dotpos); exec("cat ".$cleanfilename." | ots -r ".$ratiostring, $output); // exec("cat ".$cleanfilename." | ots -r 20", $output); } else { /* pass through text, it's only short */ exec("cat ".$cleanfilename, $output); } $abstract = implode(" ", $output); // quick & dirty $abstract = htmlspecialchars($abstract); echo ''; echo ''; echo ''.$abstract.''; unset($output); exec("cat ".$cleanfilename." | ots -a ", $output); $keywords = implode($output); $keywords = substr($keywords, 21); $keywords = substr($keywords, 0, strlen($keywords)-1); $keywordsarray = explode(",", $keywords); foreach ($keywordsarray as $keyword) { $keyword = trim($keyword); if($keyword != '') { echo ''; // echo ''; // echo ' '; // echo ' '; // echo ' '; // echo ' '; // echo ''; } } echo ''; echo ''; /* # create RDF - # http://purl.org/dc/terms/abstract # dcterms:subject - URIs for keywords */ function get_and_save($url, $filename) { $err_msg = ''; $out = fopen($filename, 'wb'); if ($out == FALSE){ print "File " . $filename . "not opened
"; exit; } $ch = curl_init(); curl_setopt($ch, CURLOPT_FILE, $out); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_URL, $url); curl_exec($ch); /* echo "
Error is : ".curl_error ( $ch); */ curl_close($ch); } function strip($oldfile, $newfile) { $content = file_get_contents($oldfile); $content = strip_html_tags($content); file_put_contents($newfile, $content); /* $newpointer = fopen($newfile, 'w'); $handle = @fopen($oldfile, 'r'); if ($handle) { while (!feof($handle)) { $buffer = fgetss($handle, 4096); fwrite($newpointer, $buffer); } fclose($handle); fclose($newpointer); } */ } /** * Remove HTML tags, including invisible text such as style and * script code, and embedded objects. Add line breaks around * block-level tags to prevent word joining after tag removal. from http://nadeausoftware.com/articles/2007/09/php_tip_how_strip_html_tags_web_page */ function strip_html_tags( $text ) { $text = preg_replace( array( // Remove invisible content '@]*?>.*?@siu', '@]*?>.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', '@]*?.*?@siu', /* Add line breaks before and after blocks '@