Jump to content

Curl not getting data right.


Ninjakreborn

Recommended Posts

I am trying to go to http://lirr42.mta.info/

and get the data from the submitted form. I have rebuilt the form exactly as needed, and done a few adjustments.  The posts that I am sending from MY form, are the same as the one on the site.  This site allows public data mining by the way.  So does someone seem something wrong with my code?

 

I know the basics are working. I have another page that is working fine. It's just this one ends up returning something..it returns part of the data but it's something wrong with it.

 

Any advice is appreciated.

<?php
define( 'DEBUG', true );

define( 'DEFAULT_STOP', 'Broadway' );

// report all errors during development/debug mode
if ( DEBUG ) 
error_reporting( E_ALL );
else
error_reporting( 0 );

include( 'simplehtmldom/simple_html_dom.php' );

$mta_post_url = 'http://lirr42.mta.info/schedules.php';

// GTFS Specification File Definitions
$gtfs_files = array
(
'agency'          =>  'agency.txt',                  // required
'stops'           =>  'stops.txt',                   // required
'routes'          =>  'routes.txt',                  // required
'trips'           =>  'trips.txt',                   // required
'stop_times'      =>  'stop_times.txt',              // required
'calendar'        =>  'calendar.txt',                // required
'calendar_dates'  =>  'calendar_dates.txt',
'fare_rules'      =>  'fare_rules.txt',
'fare_attributes' =>  'fare_attributes.txt',
'shapes'          =>  'shapes.txt',
'frequencies'     =>  'frequencies.txt',
'transfers'       =>  'transfers.txt'
);

$gtfs_pickup_codes = array
(
    0 => 'Regularly scheduled pickup',
    1 => 'No pickup available',
    2 => 'Must phone agency to arrange pickup',
    3 => 'Must coordinate with driver to arrange pickup'
);

$gtfs_dropoff_codes = array
(
0 => 'Regularly scheduled drop off', 
    1 => 'No drop off available',
    2 => 'Must phone agency to arrange drop off',
    3 => 'Must coordinate with driver to arrange drop off'
);

// load stops file 
$stopsFile = fopen( 'data/lir/' . $gtfs_files[ 'stops' ], "r" );

if ( $stopsFile )
{
// get (and toss) header row
$stopsHeader = fgetcsv( $stopsFile, 1000 );

// print_r( $stopsHeader );

// will hold HTML output for Select dropdown for stops
$stopSelectOptions = '';

// build array from file data
while ( $data = fgetcsv( $stopsFile, 1000 ) )
	{
	$stopsData[ $data[1] ] = $data[0]; 
	}

// get a sorted array of the stop names ( yes this could be done with array_multisort or usort but I didnj't feel like it right now )
$stopNames = array_keys( $stopsData );
sort( $stopNames );

// loop through sorted array and create SELECT options with default SELECTED at Grand Central Terminal
foreach( $stopNames as $stop )		
	$stopSelectOptions .= sprintf( '<option %s value="%d">%s</option>', ( DEFAULT_STOP == $stop ) ?  'selected="selected"' : '' , $stopsData[ $stop ], $stop ) . "\n\r";
	}	
fclose( $stopsFile );


// load ads file 
$adsFile = fopen( 'ads.txt', "r" );

if ( $adsFile )
{
// get (and toss) header row
$adsHeader = fgetcsv( $adsFile, 1000 );

// will hold ads available for stops
$ads = array();

// build array from file data
while ( $data = fgetcsv( $adsFile, 1000 ) )
	{
	$ads[ $data[0] ] = array
		(
		'filename' => $data[1],
		'url'      => $data[2],
		'text'     => $data[3]
		);
	}
fclose( $adsFile );
}


// printout the html header
require_once( 'header.php' );	


if ( 'POST' == $_SERVER[ 'REQUEST_METHOD' ] ) 
{
$from_stop = $_POST[ 'FromStation' ];  // @todo Filter! http://www.php.net/manual/en/filter.filters.validate.php
$orig_station = $from_stop;

$to_stop = $_POST[ 'ToStation' ];  // @todo Filter! http://www.php.net/manual/en/filter.filters.validate.php
$dest_station = $to_stop;

if (  $to_stop == $from_stop ) 
	{
	$error = 'Originating and Destination stops are the same.';
	}
else
	{

	print_ad( $orig_station, $dest_station, $location='top' );

	$travel_date = $_POST[ 'RequestDate' ];
	$requestTime = $_POST[ 'RequestTime' ];
	$am_pm       = $_POST[ 'RequestAMPM' ];
	$filter      = $_POST[ 'sortBy' ];

	/*
	* Lets post this to MTA.info
	*/

	$mta_curl = curl_init();

	$mta_curl_options = array
		(
		CURLOPT_FAILONERROR    => true,
		CURLOPT_FOLLOWLOCATION => false,
		// CURLOPT_MUTE           => true,
		CURLOPT_POST           => true,
		CURLOPT_RETURNTRANSFER => true,
		CURLOPT_CONNECTTIMEOUT => 30,
		CURLOPT_TIMEOUT        => 30,
		CURLOPT_URL            => $mta_post_url,
		CURLOPT_REFERER        => 'http://lirr42.mta.info/',
		CURLOPT_USERAGENT      => 'MTA Info Scrape/1.0',
		CURLOPT_POSTFIELDS     => http_build_query( $_POST )
		);
	curl_setopt_array( $mta_curl, $mta_curl_options );
	$mta_response = curl_exec( $mta_curl );

	if ( false === $mta_response )
		{
		curl_close( $mta_curl );
		die( sprintf( 'Response Code:%s, Curl Error No:%s, Curl Error Message:%s', curl_getinfo( $mta_curl, CURLINFO_HTTP_CODE ),curl_errno( $mta_curl ), curl_error( $mta_curl ) ) );
		}
	else
		{
		curl_close( $mta_curl );
		echo '<pre>' . htmlentities( $mta_response ) . '</pre>';

		$html = new simple_html_dom();
		$html->load( $mta_response );
		$schedule_table = $html->find( 'table', 0 );  // find second table in the response
		$row_count = 0;

		// will hold HTML output for table
		$stopSchedule = '<table><th>Departs</th><th>Arrives</th><th>Minutes</th><th>Transfer</th><th>Fare</th></tr>';

		foreach ( $schedule_table->find('tr') as $schedule_row ) 
			{
			$row_count++;
			// check for and skip header row
			if ( 1 == $row_count )
				continue;

			// check for and skip last row which first TD has rowspan attribute
			if ( $schedule_row->children(0)->colspan )
				{
				continue;
				}
			// extract table data for this row
			$depart_time      = $schedule_row->children( 0 )->innertext;
			$arrive_time      = $schedule_row->children( 2 )->innertext;
			$minutes_traveled = $schedule_row->children( 4 )->innertext;
			$transfer         = $schedule_row->children( 5 )->innertext;
			$fare             = $schedule_row->children( 6 )->plaintext; 
			// add table row to html output
			$stopSchedule .= sprintf( '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>', $depart_time, $arrive_time, $minutes_traveled, $transfer, $fare ) . "\n\r";			
			}  // end foreach schedule_row

		// add table end tag to html output
		$stopSchedule .= '</table>';

		// output table to browser
		echo $stopSchedule;

		print_ad( $orig_station, $dest_station, $location='bottom' );

		} // end if mta response

	} // end if not same orig and dest 

} // end if POST	


if ( 'GET' == $_SERVER[ 'REQUEST_METHOD' ] || ! empty( $error ) ) 
{

print_ad( null, null, $location = 'default' );

?>

<form method="post">

<?php

	if ( ! empty ( $error ) )
		{
		echo sprintf( '<div class="error">%s</div>', $error );
		}

?>

From:<br />
<select id="orig_station" name="FromStation" tabindex="2">
	<?php echo $stopSelectOptions; ?>
</select>

<br />
<br />

To:<br />
<select id="dest_station" name="ToStation" tabindex="3">
	<?php echo $stopSelectOptions; ?>
</select>

<br />
<br />

<input id="date1" name="RequestDate" size="12" maxlength="10" tabindex="4" value="<?php echo date( 'm/d/Y' ); ?>" />

<?php 
	$currentHour = trim( date( 'h' ) );
	$currentMinutes = trim( date( 'i' ) );
	$currentMinutesFloor =  sprintf( '%02d' , $currentMinutes - ( $currentMinutes % 30 ) );
	$currentAMPM = trim ( date( 'A' ) );

	$start_time = strtotime( '1:00 AM' );
	$end_time = strtotime( '1:00 PM' );
?>	

<select name="RequestTime" tabindex="5">
	<?php
		while( $start_time < $end_time )
			{
			$option_time = date( 'h:i', $start_time );
			$option_hour = trim( date( 'h', $start_time ) );

			$option_minutes = trim( date( 'i', $start_time ) );
			$option_selected = (  $option_hour == $currentHour && $option_minutes  == $currentMinutesFloor );

			echo sprintf( '<option %s>%s</option>', ( $option_selected ) ? 'selected="selected"' : '',  $option_time ) . "\r\n";

			$start_time = $start_time + 30*60;
			}
	?>
</select>

<select name="RequestAMPM" tabindex="6">

	<option value="AM" <?php echo ( 'AM' == $currentAMPM ) ? 'selected="SELECTED"' : ''; ?>>AM</option>
	<option value="PM" <?php echo ( 'PM' == $currentAMPM ) ? 'selected="SELECTED"' : ''; ?>>PM</option>
</select>

<input name="sortBy" type="hidden" value="1" />	
<input type="submit" name="submit" value="Check Schedule" />
</form>


<!--  images/base/
ic_menu_back.png
ic_menu_home.png

-->
<?php 

}

require_once( 'footer.php' );

?>

Link to comment
Share on other sites

Can you use LiveHTTPHeaders to see the full request being made in firefox and post it here?

 

A request has a few parts - there will be a line like

 

GET /index.html HTTP/1.0

 

Then there will be the headers

 

Host: lirr42.mta.info

User-Agent: Mozilla/4.0

 

and so on.  Then finally is the content, which looks like:

 

RequestDate=2010-10-01&Foo=bar

 

Apparently there is a curl_getinfo() function which can tell you what headers curl sent, though I have not used this myself.  http://www.php.net/manual/en/function.curl-getinfo.php .  It needs you to set an option before making the request.  If this works, you can use it to check for differences in the headers.  Some difference won't be important, but some might be.

Link to comment
Share on other sites

  • 1 month later...
This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.