Jump to content

Help Adding A Small Feature To My PHP Script


kinitex

Recommended Posts

I need help adding a feature to the following php script that goes through folders and reads from txt files.

 

Right now it is just grabbing the sub folder name, title and body and exporting it to a csv in columns A B and C respectively.

 

What I need it to do is grab a summary from each txt file as well and added to the 4th column in the csv.

 

I think the best way to do this would be to grab from the beginning of the body, to pre-defined closing }. So If I set it at 25 it will end the summary on the 25th } found in the txt file from the beginning of the body.

 

All the txt is in spintax format like "The {Fox|Bird|Cat} {Stole|Took} The {Food|Water}"

 

<?php 
set_time_limit(0); // set unlimited execution time 


$base_folder = $_POST['base_folder'];
$article_to_capture = (int)$_POST['article_to_capture'];

$words = explode(',', $_POST['words']);


// print_r($words); die(''); 


if(!is_dir($base_folder))
die('Invalid base folder. Please go <a href="step1.php"><strong>back</strong></a> and enter correct folder.');

?><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<title>Artcle Scraper Step 2</title>
<style type="text/css">
<!--
body {
font-family: Verdana, Arial, Helvetica, sans-serif;
font-size: 12px;
color: #333333;
}
-->
</style>
</head>

<body>
<h2>Step 2 : Processing the content of the folder. </h2>
<table width="100%" border="0" cellpadding="2" cellspacing="1" bgcolor="#CCCCCC">
  <tr bgcolor="#FFFFFF">
    <th width="10%"> </td>
    <th width="30%">BASE FOLDER NAME</td>
    <th width="50%"> <?php echo $base_folder;?></td>
    <th width="10%"> </td>
  </tr>
<?php 

$subfolder_arr = scandir($base_folder);

//print_r($arr1);

$total_subfolders = sizeof($subfolder_arr);
$subfolder_count = 0;
$file_count = 0;
$report = "";

$fp = fopen('articles.csv', 'w+');

for($i=0; $i< $total_subfolders; $i++){
  $file_name = $subfolder_arr[$i];

  if($file_name=='.'||$file_name=='..')
    continue;

  $sub_folder_name = $base_folder ."\\". $file_name;
  $file_type = is_dir($sub_folder_name) ? 'dir' : 'file';
  if($file_type=='dir'){
$sub_folder_count++;
$rpeort .= "Processing folder $sub_folder_count $sub_folder_name \r\n";
$msg = "Processing folder $sub_folder_count $sub_folder_name \r\n";
?>  
  <tr bgcolor="#FFFFFF"><td> </td><td colspan="2">
<?php echo $msg;?>
</td><td> </td></tr>
<tr bgcolor="#FFFFFF"><td> </td><td colspan="2">
<table width="90%" cellpadding="0" cellspacing="0" border="1" bordercolorlight="#0000FF">
<?php
// process sub folder
$column1 = $file_name;
$column2 = '{';
$column3 = '{';
$first = true;
$files_arr = scandir($sub_folder_name);
$article_processed =0; 	// article_processed in current sub folder

foreach($files_arr as $key=>$val){ 
    
  if(is_file($sub_folder_name.'\\'.$val) )
  { if( substr($val,-4)=='.txt'  && (filesize($sub_folder_name.'\\'.$val) <= 35000) && (filesize($sub_folder_name.'\\'.$val) >= 4000)) //file is  > 1kb
    { 
	$size = filesize($sub_folder_name.'\\'.$val);
	  $article_processed++;
	  
	  if($article_to_capture==0 || $article_processed <= $article_to_capture ){	
	  
		  if($first==true)  $first=false;
		  else
		  {	$column2 .= '|'; $column3 .= '|'; }

		  // read file get title and body
		  $file_content  = file($sub_folder_name.'\\'.$val);
		  
		  $file_title = rtrim($file_content[0]);

		  $file_content[0] = '';
		  
		  $file_arr_size = sizeof($file_content);
		  $words_arr_size = sizeof($words);
		  $t=1;
		  
		  while($t < $file_arr_size){
		  	$file_content[$t] = rtrim($file_content[$t]);
			//echo $file_content[$t];
			//die('inside');
			if( $words_arr_size>0 ){
				//die('inside');
				$temp = str_replace($words, "", $file_content[$t]);
				$file_content[$t] = $temp;
			}	
			$t++;
			//if($t>=3) die('aa');
		  }	
		  $file_body = implode('',$file_content);
		  
		  $column2 .= $file_title; 
		  $column3 .= $file_body; 
		  
?>	
<tr><td>  			
<?php 		//print_r($files_arr); 
			echo $val ."\r\n";
			echo round(($size / 1024), 2).' KB';
?>  
</td></tr>			
<?php
		} //end if .txt
	} // article processed 	
  } // end if is_file
    } // end foreach
?>	
</table>
</td><td> </td></tr>
<?php	  $column2 .= '}';
  $column3 .= '}';
  
  // write to csv / excel file
  $erro = fputcsv ($fp, array($column1,$column2,$column3) );
  
  } //end if filetype
  else{
  
  }
}  // end for

fclose($fp);

?>  <tr bgcolor="#FFFFFF">
    <td> </td>
    <td colspan=""> File Generated. 
Download it <a href="articles.csv" target="_blank">HERE</a></td>
    <td> </td>
  </tr>
</table>
</body>
</html>

  • 2 weeks later...

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.