Jump to content

Curl rate limits and time outs, limit rate under a certain amount for get requests


Go to solution Solved by gizmola,

Recommended Posts

So the situation is this I have a cloud based webapp that stores allot of our customer data. We are building in house CRM to manage a few things the app doesn't do. However I need to get and replace all the customer info from my subscription webapp via an api call to our inhouse crm mysql clients table.

The first step was to do a curl get request. This code below seems to be 1/2 working it gets down to clients with the letter C. and then returns the page, I'm missing all clients from D to Z.  How would I change this code to get all remaining pages from the curl get request?

Also I need to keep my requests under 180 per second.

<?php


function Get_Customer()
{
    $url = 'https://xxxxxxxxxxxxxxxxxx.xxxxxxmsp.com/api/v1/customers?sort=business_name';
    $cURL = curl_init();
    
    curl_setopt($cURL, CURLOPT_URL, $url);
    curl_setopt($cURL, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($cURL, CURLOPT_HTTPGET, true);
    curl_setopt($cURL, CURLOPT_HTTPHEADER, [
        'Content-Type: application/json',
        'Accept: application/json',
        'Authorization: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
    ]);
    
    $result = curl_exec($cURL);
    $data = json_decode($result, true);
    $customers = $data['customers'];
    
    // loop through array of customer
    foreach ($customers as $customer) {
        $customer['id'];
        $customer['firstname'];
        $customer['lastname'];
        $customer['fullname'];
        
        $customer['business_name'];
        $customer['email'];
        $customer['phone'];
        $customer['mobile'];
        
        $customer['business_and_full_name'];
        $customer['business_then_name'];
        echo $customer['business_name'];
        echo "<br>";
    }
    
    curl_close($cURL);
}

?>

 

I don't see anything in here that would magically stop working just because it's partway through processing the letter "C". Whatever your problem is, it's not going to be apparent from reading the code you've posted.

Continuing on with this thread the solutions was this, it returns a complete set of customer from all 6 pages. However 

<?php

function Get_Customer()
{
  set_time_limit(300); 
  $customers = [];
  $totalPages = getTotalPages();

  $page = 1;
  while ($page <= $totalPages) {
    $returnedCustomers = Customers($page);
    $customers = array_merge($customers, $returnedCustomers);
    $page++;
  }
  include('db_con.php');
  //tedst
  // echo "Total Pages: " . $page . "<br>";
  // echo "Total Customers: " . count($customers) . "<br>";

  // loop through array of customer
  foreach ($customers as $customer) {
    $customer["id"];
    $customer["firstname"];
    $customer["lastname"];
    $customer["fullname"];

    $customer["business_name"];
    $customer["email"];
    $customer["phone"];
    $customer["mobile"];
    $customer["address"];
    $customer["city"];
    $customer["state"];
    $customer["zip"];
    $customer["business_and_full_name"];
    $customer["business_then_name"];
    if ($customer["business_name"] != "")
        {
            $custname = $customer["business_name"];

            //Insert customer data into the customer table with syncroID and Company Name
            $query = "INSERT INTO Customer (SyncroID, CompanyName)
                        VALUES ('" . $customer["id"] . "', '" . $customer["business_name"] ."');";
            echo "<br>";
            echo $query;
            echo "<br>";
           //$stmt = $pdo->query($query);
            usleep(5000);
            
        
        }
        }

  return $customers;
}

function Customers($page = 1)
{
  $url =
    "https://xxxxxxxxxxxxxxxxxxxxxxxxcom/api/v1/customers?sort=business_name&page=" .
    $page;

  $cURL = curl_init();

  curl_setopt($cURL, CURLOPT_URL, $url);
  curl_setopt($cURL, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($cURL, CURLOPT_HTTPGET, true);
  curl_setopt($cURL, CURLOPT_HTTPHEADER, [
    "Content-Type: application/json",
    "Accept: application/json",
    "Authorization: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  ]);

  $result = curl_exec($cURL);
  $data = json_decode($result, true);
  $customers = $data["customers"];

  curl_close($cURL);
  return $customers;
}

function getTotalPages()
{
  $url = "https://xxxxxxxxxxxxxxxxxsp.com/api/v1/customers";

  $cURL = curl_init();

  curl_setopt($cURL, CURLOPT_URL, $url);
  curl_setopt($cURL, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($cURL, CURLOPT_HTTPGET, true);
  curl_setopt($cURL, CURLOPT_HTTPHEADER, [
    "Content-Type: application/json",
    "Accept: application/json",
    "Authorization: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  ]);

  $result = curl_exec($cURL);
  $data = json_decode($result, true);
  $totalPages = $data["meta"]["total_pages"];
  curl_close($cURL);
  return $totalPages;
}

// Get_Customer();
// test
/*
 echo "<br>";
 echo $custname;
 echo " ";
 echo $customer["address"];
 echo " ";
 echo $customer["city"];
 echo ",";
 echo $customer["state"];
 echo " ";
 echo  $customer["zip"];
 echo "<b>Customer ID:</b> ";
 echo " ";
 echo $customer["id"];
 echo "<br>";
 echo $query;
 echo "<br>";
 */
//INSERT INTO `Customer` (`SyncroID`, `PrimaryContactID`, `CompanyName`, `Address`, `City`, `State`, `Postcode`, `Country`, `AccountStatus`, `UserName`, `Password_Hash`, `Password_Salt`, `Notes`, `BillingContactID`)
//VALUES ('12321', NULL, 'test', NULL, NULL, 'QLD', NULL, 'Australia', 4, NULL, NULL, NULL, NULL, NULL);

When I change this line of code to insert into my database

 //$stmt = $pdo->query($query);

to

 $stmt = $pdo->query($query);

I only get the first 80 rows before I get a gateway time out. I tried usleep in for and set_time_limit but they made no difference.  Anyone got any ideas. I know the api is getting the full data set so I assume its some memory or time out limit.

Firstly, these lines of code do absolutely nothing...

    $customer["id"];
    $customer["firstname"];
    $customer["lastname"];
    $customer["fullname"];

    $customer["business_name"];
    $customer["email"];
    $customer["phone"];
    $customer["mobile"];
    $customer["address"];
    $customer["city"];
    $customer["state"];
    $customer["zip"];
    $customer["business_and_full_name"];
    $customer["business_then_name"];
    

Secondly, you are using the least efficient means of inserting data that there is. Single inserts are slow. Marginally faster are prepared statements where you prepare once then execute in a loop with new value for each call. By far the fastest way is use multiple inserts.

EG INSERT INTO table (id, name) VALUES (1, 'Name A'), (2, 'Name B'), (3, 'Name 3');

To implement...

$data = [];
foreach ($customers as $c)  {
    $data[] = sprintf("( %d, '%s')", $c['id'], $c['business_name']); 
}
$pdo->exec("INSERT INTO Customer (SyncroID, CompanyName) VALUES " . join(',', $data));

 

  • Solution

What the heck is this code?

    $customer["id"];
    $customer["firstname"];
    $customer["lastname"];
    $customer["fullname"];

    $customer["business_name"];
    $customer["email"];
    $customer["phone"];
    $customer["mobile"];
    $customer["address"];
    $customer["city"];
    $customer["state"];
    $customer["zip"];
    $customer["business_and_full_name"];
    $customer["business_then_name"];

 

I have no idea why you would think that adding 5ms of sleep would help anything.

Your PDO code should be using a prepared statement.  

Then you simply execute the statement in a loop, passing the data each time.

It is well known that MySQL inserts are much faster if you do multiples:

INSERT INTO Table (col1, col2) VALUES (?, ?), (?, ?), (?, ?)....

This is one place where PDO doesn't have a built in solution, so you have to build your own, as Barand helpfully provided an example.  It is possible to have limits on the size of a query, but would require a lot of data.  Changing of that limit is dependent on what mysql library you are using, so I leave that to you to research.  For example, if you are using mysqlnd, then a PDO runtime parameter changing that size is ignored.

A good hybrid option would be to create a batch system where you load an array of the values inside the outer foreach, build the statement from that array, and prepare the statement and pass the array to the bind.

However, the first stab I would take, would be to simply do a prepare and execute loop with a single transaction.  

$query = "INSERT INTO Customer (SyncroID, CompanyName) VALUES (?, ?)";
    
try {
    $pdo->beginTransaction();
    $stmt = $pdo->prepare($query);
    foreach ($customers as $customer)
    {
        if (!empty($customer['business_name'])) {
		    $stmt->execute([$customer['id'], $customer['business_name']]);
        }
    }
    $pdo->commit();
}catch (\Throwable $e){
    $pdo->rollback();
    throw $e;
}

Todo this type of thing effectively you need to make sure that the PDO connection has $conn->setAttribute( PDO::ATTR_EMULATE_PREPARES, false ) and $conn->setAttribute( PDO::ERRMODE_EXCEPTION, true).

Thanks ill give that go .

In Reply to gizmol  the data below is additional data that I'll need to insert later after i can successfully insert just the ID and business name. I know that I'm currently not using but i hope to add it in later ill just comment it out for now.

    $customer["id"];
    $customer["firstname"];
    $customer["lastname"];
    $customer["fullname"];

    $customer["business_name"];
    $customer["email"];
    $customer["phone"];
    $customer["mobile"];
    $customer["address"];
    $customer["city"];
    $customer["state"];
    $customer["zip"];
    $customer["business_and_full_name"];
    $customer["business_then_name"];

 

If this is your insert query...

INSERT INTO `Customer` (`SyncroID`, `PrimaryContactID`, `CompanyName`, `Address`, `City`, `State`, `Postcode`, `Country`, `AccountStatus`, `UserName`, `Password_Hash`, `Password_Salt`, `Notes`, `BillingContactID`)
VALUES ('12321', NULL, 'test', NULL, NULL, 'QLD', NULL, 'Australia', 4, NULL, NULL, NULL, NULL, NULL);

...there is only a 50% correlation between those column names and the data keys in the customer array...

-------------------------------------+----------------------------------------- 
|         Matched                    |           Unmatched                    |
-------------------------------------+----------------------------------------- 
 $customer["id"];                        $customer["firstname"];    
                                         $customer["lastname"];               
 $customer["business_name"];             $customer["email"];                  
 $customer["address"];                   $customer["phone"];                  
 $customer["city"];                      $customer["mobile"];
 $customer["state"];                     $customer["business_and_full_name"]; 
 $customer["zip"];                       $customer["business_then_name"];     
                                         $customer["fullname"];

 

you should use a data-driven design, with an array that maps the data field to the database column, then dynamically build the sql query statement and get the data values, by looping over the defining array. this will prevent typos, mismatched entries,... and allow you to change what the code does simply by changing the defining array.

4 hours ago, portabletelly said:

I only get the first 80 rows before I get a gateway time out. I tried usleep in for and set_time_limit but they made no difference.  Anyone got any ideas.

Typically mass imports are something that wouldn't be done via a web request.   The task would be offloaded to a background task (through a cron job or task queue system) where you don't have to worry as much about timeouts.

For example, for simple things I've typically inserted an import job into a table in the database with whatever information is needed for the import process.  Then a second script run via a cron job on some interval checks that table for new import tasks and if one is found, performs the actual import task.  This way the user isn't stuck with a loading page for however long the import takes, and the import task isn't subject to any timeouts other than PHP's max_execution_time settings (which is unlimited by default for CLI scripts).

Just circling back to this. Finally got it working with gizmola's code thank you. This was the end result. Now i have to rewrite this code to add all the other fields in. Then after that is successful if the ID already exists in mysql then only update row if there's something different.  I don't fully understand why this is faster and works though. Is it because it prepares the query outside of the for loop rather than inside the for loop and that's why it doesn't time out? 

<?php

function Get_Customer()
{
  set_time_limit(300); 
  $customers = [];
  $totalPages = getTotalPages();

  $page = 1;
  while ($page <= $totalPages) {
    $returnedCustomers = Customers($page);
    $customers = array_merge($customers, $returnedCustomers);
    $page++;
  }
  include('db_con.php');

//************************************start of gizmola coe *************************
    $query = "INSERT INTO Customer (SyncroID, CompanyName) VALUES (?, ?)";
 
        try {
            $pdo->beginTransaction();
            $stmt = $pdo->prepare($query);
                foreach ($customers as $customer)
                    {
                        if (!empty($customer['business_name'])) {
                        $stmt->execute([$customer['id'], $customer['business_name']]);
                                                                }
                    }
        $pdo->commit();
            }
 
        catch (\Throwable $e){
            $pdo->rollback();
            throw $e;
                             }
//******************************end of gizmola code ***************************                    
     

  return $customers;
}

function Customers($page = 1)
{
  $url =
    "https://xxxxxxxxxxxxxxxxxxxxxxxxxxmsp.com/api/v1/customers?sort=business_name&page=" .
    $page;

  $cURL = curl_init();

  curl_setopt($cURL, CURLOPT_URL, $url);
  curl_setopt($cURL, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($cURL, CURLOPT_HTTPGET, true);
  curl_setopt($cURL, CURLOPT_HTTPHEADER, [
    "Content-Type: application/json",
    "Accept: application/json",
    "Authorization: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  ]);

  $result = curl_exec($cURL);
  $data = json_decode($result, true);
  $customers = $data["customers"];

  curl_close($cURL);
  return $customers;
}

function getTotalPages()
{
  $url = "https://xxxxxxxxxxxxxxxmsp.com/api/v1/customers";

  $cURL = curl_init();

  curl_setopt($cURL, CURLOPT_URL, $url);
  curl_setopt($cURL, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($cURL, CURLOPT_HTTPGET, true);
  curl_setopt($cURL, CURLOPT_HTTPHEADER, [
    "Content-Type: application/json",
    "Accept: application/json",
    "Authorization: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  ]);

  $result = curl_exec($cURL);
  $data = json_decode($result, true);
  $totalPages = $data["meta"]["total_pages"];
  curl_close($cURL);
  return $totalPages;
}

 

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.