Jump to content

get content of website


fer0an

Recommended Posts

Hello

I want get content of website and insert it to my website

This website have 2000 page and each one have 10 post.

I developed some code but I've some problem:

for($i=1;$i<=3;$i++)   //$i is website pages
{
//download main page
$maincatst = file_get_contents($catlink.$i);  //$catlink select link of category of website
//number of post in main page
for ($j=1;$j<=10;$j++)
{
$linkposttmp = TextBetween('before link','after link',$maincatst) ;

$link = TextBetween('before link','after link>',$linkposttmp);

// download content of each link 
$main_post_str = file_get_contents($link);

 

My problem is when run this file only download first link of each mainpage.

anyone can help me?

Link to comment
https://forums.phpfreaks.com/topic/215199-get-content-of-website/
Share on other sites

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">



<html xmlns="http://www.w3.org/1999/xhtml">

<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Untitled Document</title>
</head>
<?php
set_time_limit(0) ;
ob_implicit_flush(true);
ob_end_flush();
//get file extension
function get_file_extension($file_name) 
{
return substr(strrchr($file_name,'.'),1);
}
function TextBetween($s1,$s2,$s){
$s1 = strtolower($s1);
$s2 = strtolower($s2);
$L1 = strlen($s1);
$scheck = strtolower($s);
if($L1>0){$pos1 = strpos($scheck,$s1);} else {$pos1=0;}
if($pos1 !== false){
   if($s2 == '') return substr($s,$pos1+$L1);
   $pos2 = strpos(substr($scheck,$pos1+$L1),$s2);
   if($pos2!==false) return substr($s,$pos1+$L1,$pos2);

}

return '';

}








$dbuser = "user";
$dbpass = "pass";
$dbhost = "localhost";
$dbname = "database";



// Connecting, selecting database



$con = mysql_connect($dbhost, $dbuser, $dbpass)

   or die('Could not connect: ' . mysql_error());







mysql_select_db($dbname) or die('Could not select database');











//cat ha:



$catstring = '




[name]Arts and Photography[/name][link]http://www.website.com/arts-photography/page/[/link]

[name]Outdoors and Nature[/name][link]http://www.website.com/outdoors-nature/page/[/link]';







    $query_table ="CREATE TABLE `second_content` (



`name` VARCHAR( 255 ) NOT NULL ,



`text` TEXT NOT NULL ,



`dllink` VARCHAR( 500 ) NOT NULL ,



`size` VARCHAR( 20 ) NOT NULL ,



`changed` TINYINT( 1 ) NOT NULL DEFAULT '0' ,



`fileext` VARCHAR( 5 ) NOT NULL ,



`cat` VARCHAR (50) NOT NULL , 

`dblink` VARCHAR( 500 ) NOT NULL 

) ;";



    if( $result_table = mysql_query($query_table) ) {echo "yes1";} else echo "no1"; 



    



    



  while($catlink = TextBetween('[link]','[/link]',$catstring))



   {



    //dar avardane link va cat name  



    $catname = TextBetween('[name]','[/name]',$catstring);



    $catstring = str_replace("[link]".$catlink."[/link]","",$catstring);



    $catstring = str_replace("[name]".$catname."[/name]","",$catstring);



    echo "<b>".$catname."</b><br><b>".$catlink."</b><br>".$catstring."<br>" ;
    
    
     //tedad safahate category
     $i=1;
     while ($i<=4)
     {
     echo "<font color=\"blue\">".$i."</font>"; 


//download e safe asli
$maincatst = file_get_contents($catlink.$i);
//echo $maincatst;
//tedad post dar har safhe

$i++;
{
$linkposttmp = TextBetween('dle-info','footer',$maincatst) ;
//echo $linkposttmp;

$link = TextBetween('shortnews-header"><a href="','">',$linkposttmp);

echo $link."<br>";

// download content url haye safe farei
$main_post_str = file_get_contents($link);



//title
            $title = TextBetween('fullnews-header">','</div>',$main_post_str);

//alias
                  $alias1 = str_replace(" ","-",$title);
          $pieces = explode("-", $alias1);
          $alias = $pieces[0]."-".$pieces[1]."-".$pieces[2]."-".$pieces[3]."-".$pieces[4]."-".$pieces[5];

             
//full text            

                        $text0= TextBetween('<div id="news-id-','"fullnews-info-bottom">',$main_post_str);
                        //echo $text0;
                        $text = TextBetween('<b>','</div>',$text0);
                        $text ="<b>".$text;
//   echo $text;            

    
//images names
$imagename = str_replace(" ","",$title);
$imagename = str_replace("#","_",$imagename);
      $imagename = str_replace("\\","_",$imagename);
      $imagename = str_replace("/","_",$imagename);
//images address
$images1 = textbetween('<div id="news-id-','/></a>',$main_post_str);
$images1 = textbetween('src="','"',$images1);

//save images
$im = imagecreatefromjpeg($images1); // original image

//reduce image quality
imagejpeg($im, "/home/site/public_html/static/images/book/".$imagename.".jpg" , 60); // save to new image, third value is quality (0-100) if not specified its the default (75)
$size = getimagesize($images1);
$dimention = $size[3];

$images = '<img src="http://www.website.com/static/images/book/'.$imagename.'.jpg" '.$dimention.'';
$images = $images.' align="right">';
$images = str_replace ('<img src=""','<img src="http://www.website.com/images/noimage.jpg"',$images);
//echo $images;
//if picture exist
if (@fclose(@fopen( "$images1", "r" ))) {
print("File exists.");
} else {
print("File does not exist.");
} 


//full text

                        $full = '<p align="left">'.$images.'</p></br>'.$text;
                  //      echo $full;


//DL link 
$finddl = textbetween ("<div id='comm-id-",'</noindex>',$main_post_str);
$finddl1 = textbetween ('<noindex><a href="','"',$finddl);
$size = textbetween ('Format','<noindex>',$finddl);
$size = textbetween ('Size:','</b>',$size);

//echo $size."<br/>";
//echo $finddl1."<br/>";


//extension

            if( strpos(finddl1,".zip") ) { $ext = ".zip";} else $ext = ".rar";
            




//pak kardane url ghabli
$maincatst = str_replace('shortnews-header"><a href="'.$linkposttmp.'">', "",$maincatst);


//continue if exist bar hasbe title
$query_se = mysql_query(" select `name` from `second_content` where `name`='$title' " );
if (!$query_se)
  {
  die('Error: exist ' . mysql_error());
  }
$title_numrows = mysql_num_rows($query_se);
$nat_se = mysql_fetch_array($query_se);

$title_tm = $nat_se["name"];

echo "titletm= $title_tm ";

if ($title_numrows>0 )

{ 
echo "$title :found<br>";
continue ;
}
else 
{
echo "<font color='red' >$title :$title_tm found</font><br>";
print_r($nat_se);
}





            //rikhtane info dar db


        
$query_insert = "INSERT INTO `second_content` (`name`, `text`, `dllink`, `size`, `changed`, `fileext`, `cat`, `dblink`)
VALUES ('".$title."', '".$full."', '".$finddl1."', '".$size."', '0', '".$ext."', '".$catname."', '".$dblink."');";

if($result_insert =mysql_query($query_insert)) {echo "yes2".$title."<br>";} else echo "<font color='red' ><b>no2</b></font>".$title;


$query_se = "SELECT * FROM `first_content` WHERE `name`='".$title."' ";



$result_se = mysql_query($query_se);

$title_numrows = mysql_num_rows($result_se);

$nat_se = mysql_fetch_array($result_se);



$title_tm = $nat_se["name"];



echo "titletm= $title_tm ";



if ($title_numrows>0 ) { echo "$title :found<br>"; continue ;}
else
{
echo "<font color='red' >$title :$title_tm found</font>";
print_r($nat_se);
}










if($result_insert =mysql_query($query_insert)) {echo "yes2".$title."<br>";} else echo "no2".$title;



}



}



        }





     echo "<br>salam".$i;



    



     $strp_no = strpos($maincatst,"No articles found" );



    while( $strp_no == FALSE);



    echo "<br>salam<br>";




}



?>

</html>

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.