mikebyrne Posted August 12, 2009 Share Posted August 12, 2009 I've written a piece of code to convert a pdf doc to a txtfile (still not working) but at present the pdf doc is predefined within the code. How can I recode this so the user has the option to select any chosen file?? my code is <?php // Function : pdf2txt() // Arguments : $filename - Filename of the PDF you want to extract // Description : Reads a pdf file, extracts data streams, and manages // their translation to plain text - returning the plain // text at the end // Authors : Jonathan Beckett, 2005-05-02 // : Sven Schuberth, 2007-03-29 function pdf2txt($filename){ $pdftext = pdf2txt('C:\Users\Mike\Desktop\Athy Database\Athy Register.pdf'); $data = getFileData($filename); $s=strpos($data,"%")+1; $version=substr($data,$s,strpos($data,"%",$s)-1); if(substr_count($version,"PDF-1.2")==0) return handleV3($data); else return handleV2($data); } // handles the verson 1.2 function handleV2($data){ // grab objects and then grab their contents (chunks) $a_obj = getDataArray($data,"obj","endobj"); foreach($a_obj as $obj){ $a_filter = getDataArray($obj,"<<",">>"); if (is_array($a_filter)){ $j++; $a_chunks[$j]["filter"] = $a_filter[0]; $a_data = getDataArray($obj,"stream\r\n","endstream"); if (is_array($a_data)){ $a_chunks[$j]["data"] = substr($a_data[0], strlen("stream\r\n"), strlen($a_data[0])-strlen("stream\r\n")-strlen("endstream")); } } } // decode the chunks foreach($a_chunks as $chunk){ // look at each chunk and decide how to decode it - by looking at the contents of the filter $a_filter = split("/",$chunk["filter"]); if ($chunk["data"]!=""){ // look at the filter to find out which encoding has been used if (substr($chunk["filter"],"FlateDecode")!==false){ $data =@ gzuncompress($chunk["data"]); if (trim($data)!=""){ $result_data .= ps2txt($data); } else { //$result_data .= "x"; } } } } return $result_data; file_put_contents('C:\Users\Mike\Desktop\txtfile.txt', pdf2txt('C:\Users\Mike\Desktop\Athy Database\Athy Register.pdf')); } //handles versions >1.2 function handleV3($data){ // grab objects and then grab their contents (chunks) $a_obj = getDataArray($data,"obj","endobj"); $result_data=""; foreach($a_obj as $obj){ //check if it a string if(substr_count($obj,"/GS1")>0){ //the strings are between ( and ) preg_match_all("|\((.*?)\)|",$obj,$field,PREG_SET_ORDER); if(is_array($field)) foreach($field as $data) $result_data.=$data[1]; } } return $result_data; file_put_contents('C:\Users\Mike\Desktop\txtfile.txt', pdf2txt('C:\Users\Mike\Desktop\Athy Database\Athy Register.pdf')); } function ps2txt($ps_data){ $result = ""; $a_data = getDataArray($ps_data,"[","]"); if (is_array($a_data)){ foreach ($a_data as $ps_text){ $a_text = getDataArray($ps_text,"(",")"); if (is_array($a_text)){ foreach ($a_text as $text){ $result .= substr($text,1,strlen($text)-2); } } } } else { // the data may just be in raw format (outside of [] tags) $a_text = getDataArray($ps_data,"(",")"); if (is_array($a_text)){ foreach ($a_text as $text){ $result .= substr($text,1,strlen($text)-2); } } } return $result; file_put_contents('C:\Users\Mike\Desktop\txtfile.txt', pdf2txt('C:\Users\Mike\Desktop\Athy Database\Athy Register.pdf')); } function getFileData($filename){ $handle = fopen($filename,"rb"); $data = fread($handle, filesize($filename)); fclose($handle); return $data; } function getDataArray($data,$start_word,$end_word){ $start = 0; $end = 0; unset($a_result); while ($start!==false && $end!==false){ $start = strpos($data,$start_word,$end); if ($start!==false){ $end = strpos($data,$end_word,$start); if ($end!==false){ // data is between start and end $a_result[] = substr($data,$start,$end-$start+strlen($end_word)); } } } return $a_result; file_put_contents('C:\Users\Mike\Desktop\txtfile.txt', pdf2txt('C:\Users\Mike\Desktop\Athy Database\Athy Register.pdf')); } ?> Link to comment https://forums.phpfreaks.com/topic/169903-select-any-pdf-document/ Share on other sites More sharing options...
Lyleyboy Posted August 12, 2009 Share Posted August 12, 2009 The easy way would be to keep all the PDF's in the same folder (and nothing else) then create a form to select the files. Something like the below. <select name="file"> <?php $dir = opendir('server/pdf/'); while ($read = readdir($dir)) { if ($read!='.' && $read!='..') { echo "<option value='" . $read . "'>" . $read . "</option>"; } } closedir($dir); ?> </select> Link to comment https://forums.phpfreaks.com/topic/169903-select-any-pdf-document/#findComment-896357 Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.