Jump to content

Get stream from PHP cURL


NotionCommotion

Recommended Posts

14 minutes ago, kicken said:

Are you trying to get the cURL resource so you can call various curl_* functions using it?

If a response is large, json-machine is used to decode the string instead of json_decode().  Based on the application's configuration, $this->stream can either be set using Guzzle or just PHP's cURL library.  I've got the Guzzle version working, but not the cURL version.

    private function getParsedResults()
    {
        if(is_null($this->parsedResults)) {
            if(fstat($this->stream)['size'] > self::MAX_JSON_DECODE) {
                $this->parsedResults=[];
                foreach (JsonMachine::fromStream($this->stream) as $key => $value) {
                    $this->parsedResults[$key] = $value;
                }
            }
            else {
                $this->parsedResults=json_decode(stream_get_contents($this->stream), true);
                if (json_last_error() !== JSON_ERROR_NONE) {
                    throw new \InvalidArgumentException('Invalid JSON');
                }
            }
            $this->validate($this->parsedResults);
        }

        return $this->parsedResults;
    }

 

Link to comment
Share on other sites

25 minutes ago, requinix said:

cURL doesn't expose its streams.

JsonMachine accepts an iterator. Use CURLOPT_WRITEFUNCTION to receive some number of bytes from cURL, feed that into an iterator, and give the iterator to JsonMachine.

Thanks

Thanks,  I will change paths and not try to force cURL to expose a stream.

<?php

class Curl implements DriverInterface, QueryDriverInterface
{
    public function __construct($dsn, $options = [])
    {
        //...
        stream_wrapper_register("curl", "CurlStream") or die("Failed to register protocol wrapper");
    }
    protected function execute($url, $curlOptions = [])
    {
        $this->lastRequestInfo = null;
        $ch = curl_init();

        foreach ($curlOptions as $option => $value) {
            curl_setopt($ch, $option, $value);
        }

        $stream = fopen("curl://CurlStream", "r+");

        curl_setopt($ch, CURLOPT_URL, $this->dsn . '/' . $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_BUFFERSIZE, 256);
        curl_setopt($ch, CURLOPT_FILE, $stream);

        curl_exec($ch);

        $this->lastRequestInfo = curl_getinfo($ch);


        if (fstat($stream)['size']) {
            // in case of total failure - socket/port is closed etc
            throw new Exception('Request failed! curl_errno: ' . curl_errno($ch));
        }


        curl_close($ch);

        return $stream;
    }

}


 

class CurlStream //implements SomeInterface?
{
    //Reference https://stackoverflow.com/questions/1342583/manipulate-a-string-that-is-30-million-characters-long/1342760#1342760

    private $buffer, $position, $varname; //Not sure about $position, $varname

    public function stream_open($path, $mode, $options, &$opened_path) {
        //$path: curl://CurlStream, $mod: r+, $options: '', $opened_path: 0
        $url = parse_url($path); //["scheme"=>"curl","host"=>"CurlStream"]
        $this->varname = $url["host"];
        $this->position = 0;
        return true;
    }

    public function stream_write($data) {
        // Extract the lines ; on y tests, data was 8192 bytes long ; never more
        $lines = explode("\n", $data);

        // The buffer contains the end of the last line from previous time
        // => Is goes at the beginning of the first line we are getting this time
        $lines[0] = $this->buffer . $lines[0];

        // And the last line os only partial
        // => save it for next time, and remove it from the list this time
        $nb_lines = count($lines);
        $this->buffer = $lines[$nb_lines-1];
        unset($lines[$nb_lines-1]);

        // Here, do your work with the lines you have in the buffer
        //var_dump($lines); echo '<hr />';

        return strlen($data);
    }

    //Not sure about the remaining methods.
    //Reference https://www.php.net/manual/en/stream.streamwrapper.example-1.php

    public function stream_stat()
    {
        return ['size'=>strlen($this->buffer)];
    }

    public function stream_get_contents()
    {
        return $this->buffer;
    }

    public function stream_read($count)
    {
        $ret = substr($GLOBALS[$this->varname], $this->position, $count);
        $this->position += strlen($ret);
        return $ret;
    }

    public function stream_tell()
    {
        return $this->position;
    }

    public function stream_seek($offset, $whence)
    {
        switch ($whence) {
            case SEEK_SET:
                if ($offset < strlen($GLOBALS[$this->varname]) && $offset >= 0) {
                    $this->position = $offset;
                    return true;
                } else {
                    return false;
                }
                break;

            case SEEK_CUR:
                if ($offset >= 0) {
                    $this->position += $offset;
                    return true;
                } else {
                    return false;
                }
                break;

            case SEEK_END:
                if (strlen($GLOBALS[$this->varname]) + $offset >= 0) {
                    $this->position = strlen($GLOBALS[$this->varname]) + $offset;
                    return true;
                } else {
                    return false;
                }
                break;

            default:
                return false;
        }
    }

    public function stream_metadata($path, $option, $var)
    {
        if($option == STREAM_META_TOUCH) {
            $url = parse_url($path);
            $varname = $url["host"];
            if(!isset($GLOBALS[$varname])) {
                $GLOBALS[$varname] = '';
            }
            return true;
        }
        return false;
    }
}

 

Link to comment
Share on other sites

Are you thinking of something like the following?

$obj = new Curl();
$iterator=$obj->query('bla');
$jsonMachine=new JsonMachine($iterator);
foreach ($jsonMachine as $key => $value) {
    var_dump($value);
}
class Curl
{
    public function query(string $url)
    {
        $iterator = new CurlBytes();
        $ch = curl_init();

        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_WRITEFUNCTION, function ($ch, string $str) use ($iterator) {
            $iterator->append($str);
            return strlen($str); 
        });
        curl_exec($ch);
        curl_close($ch);
        return $iterator;
    }
}

 

class CurlBytes implements \IteratorAggregate
{
    private $string = '';
    private $chunkSize;

    public function __construct($chunkSize = 1024 * 8)
    {
        $this->chunkSize = $chunkSize;
    }

    public function append(string $string)
    {
        $this->string .= $string;
    }

    public function getIterator()
    {
        $len = strlen($this->string);
        for ($i=0; $i<$len; $i += $this->chunkSize) {
            yield substr($this->string, $i, $this->chunkSize);
        }
    }
}

 

Link to comment
Share on other sites

What I was originally thinking turned out to not be possible (I was thinking of two concurrent iterators), so my answer is basically "whatever works" now.

So, as long as you're not approaching your memory limit, I would stick all the string blobs from cURL into an array or ArrayObject, wait until cURL completes, then give that to JsonMachine. That's almost what you're doing now, except you're doing string concatenation and that can potentially hurt performance by a lot because every time you append a string PHP has to reallocate a new one in memory to hold everything and then copy the old value into it. An array/ArrayObject uses slightly more memory because of the overhead but doesn't come with the costly memory management.

Link to comment
Share on other sites

Looks like Guzzle's approach is to either use CURLOPT_WRITEFUNCTION to write to a stream or CURLOPT_FILE to write to a file.

        if (isset($options['sink'])) {
            $sink = $options['sink'];
            if (!is_string($sink)) {
                $sink = \GuzzleHttp\Psr7\stream_for($sink);
            } elseif (!is_dir(dirname($sink))) {
                // Ensure that the directory exists before failing in curl.
                throw new \RuntimeException(sprintf(
                    'Directory %s does not exist for sink value of %s',
                    dirname($sink),
                    $sink
                ));
            } else {
                $sink = new LazyOpenStream($sink, 'w+');
            }
            $easy->sink = $sink;
            $conf[CURLOPT_WRITEFUNCTION] = function ($ch, $write) use ($sink) {
                return $sink->write($write);
            };
        } else {
            // Use a default temp stream if no sink was set.
            $conf[CURLOPT_FILE] = fopen('php://temp', 'w+');
            $easy->sink = Psr7\stream_for($conf[CURLOPT_FILE]);
        }

 

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.