Jump to content

Logarithmic Best Fit - Negative X Values


Mark Baker

Recommended Posts

I've been doing some work on a set of classes for calculating best fit trend lines from a set of data points using least square.

 

For a logarithmic best fit, I need to get the log of the X-Values before executing the least square routine. The class that I've written is shown below.

<?php

class logarithmicBestFit
{
private $_bestFitType		= 'logarithmic';

private $_xValues			= array();

private $_yValues			= array();

private $_yBestFitValues	= array();

private $_goodnessOfFit 	= 1;

private $_stdevOfResiduals	= 0;

private $_slope				= 0;

private $_increment			= 0;


public function getBestFitType() {
	return $this->_bestFitType;
}	//	function getYPointFromSlopeIntersect()


public function getValueOfYForX($xValue) {
	return $this->getSlope() * $xValue + $this->getIntersect();
}	//	function getValueOfYForX()


public function getValueOfXForY($yValue) {
	return exp(($yValue - $this->getIntersect()) / $this->getSlope());
}	//	function getValueOfXForY()


public function getSlope() {
	return $this->_slope;
}	//	function getSlope()


public function getIntersect() {
	return $this->_intersect;
}	//	function getIntersect()


public function getGoodnessOfFit() {
	return $this->_goodnessOfFit;
}	//	function getYPointFromSlopeIntersect()


public function getStdevOfResiduals() {
	return $this->_stdevOfResiduals;
}	//	function getStdevOfResiduals()


public function getYBestFitValues() {
	return $this->_yBestFitValues;
}	//	function getYBestFitValues()


private function _calculateGoodnessOfFit() {
	//	Calculate number of points
	$nY = count($this->_yValues);

	$meanY = array_sum($this->_yValues) / $nY;

	$SSreg = $SStot = 0.0;
	foreach($this->_xValues as $xKey => $xValue) {
		$bestFitY = $this->getValueOfYForX($xValue);

		$SSreg += ($this->_yValues[$xKey] - $bestFitY) * ($this->_yValues[$xKey] - $bestFitY);
		$SStot += ($this->_yValues[$xKey] - $meanY) * ($this->_yValues[$xKey] - $meanY);
	}

	$this->_stdevOfResiduals = sqrt($SSreg / ($nY - 2));
	if (($SStot == 0.0) || ($SSreg == $SStot)) {
		$this->_goodnessOfFit = 1;
	} else {
		$this->_goodnessOfFit = 1 - ($SSreg / $SStot);
	}
}	//	function _calculateGoodnessOfFit()


private function _logarithmic_regression($yValues, $nY, $xValues) {
	foreach($xValues as $key => $value) {
		$xValues[$key] = log($xValues[$key]);
	}

	// calculate sums
	$x_sum = array_sum($xValues);
	$y_sum = array_sum($yValues);
	$xx_sum = $xy_sum = 0;
	for($i = 0; $i < $nY; $i++) {
		$xy_sum += $xValues[$i] * $yValues[$i];
		$xx_sum += $xValues[$i] * $xValues[$i];
	}

	// calculate slope
	$this->_slope = (($nY * $xy_sum) - ($x_sum * $y_sum)) / (($nY * $xx_sum) - ($x_sum * $x_sum));
	// calculate intersect
	$this->_intersect = ($y_sum - ($this->_slope * $x_sum)) / $nY;

	$this->_calculateGoodnessOfFit();

	foreach($this->_xValues as $xValue) {
		$this->_yBestFitValues[] = $this->getValueOfYForX($xValue);
	}
}	//	function _logarithmic_regression()


function __construct($yValues, $xValues=array()) {
	//	Calculate number of points
	$nY = count($yValues);
	$nX = count($xValues);

	//	Define X Values if necessary
	if ($nX == 0) {
		$xValues = range(1,$nY);
		$nX = $nY;
	}
	//	Ensure both arrays of points are the same size
	if ($nY != $nX) {
		trigger_error("logarithmic_regression(): Number of elements in coordinate arrays do not match.", E_USER_ERROR);
	}

	$this->_xValues = $xValues;
	$this->_yValues = $yValues;

	$this->_logarithmic_regression($yValues, $nY, $xValues);
}	//	function __construct()

}	//	class logarithmicBestFit

?>

The problem arises when any of the X-Values are 0.0 or are negative.

I've tried adjusting the loop that sets the log of X with a set of if tests to return values of -1 for an X of 0.0, and -1 -(log(-1 - X)) where X is negative, but can't get the correctly restored values in the getValueOfYForX() and getValueOfXForY() methods.

 

I've also considered the possibility of offsetting X-Values to ensure that they're never 0 or below, but am not sure how this will affect the least-square calculation.

 

I'd appreciate any help in the best method to resolve this issue from someone with better math than myself.

Link to comment
https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/
Share on other sites

I've also considered the possibility of offsetting X-Values to ensure that they're never 0 or below, but am not sure how this will affect the least-square calculation.

 

 

It will not. You will need to add the offset to arguments of calculated formula.

 

So, if you offset the values by offset 'o', and the calculated formula is y=a + b* ln(x), then your final formula will be y = a + b * ln(x+o)

I've tried offsetting the X-Values immediately prior to my least square calculation using:

foreach($xValues as $key => $value) {
$value += $this->_Xoffset;
$xValues[$key] = log($value);
}

Then modified the getValueOfYForX() method to use the same offset

public function getValueOfYForX($xValue) {
return $this->getSlope() * log($xValue + $this->_Xoffset) + $this->getIntersect();
}	//	function getValueOfYForX()

but I'm getting totally different curves for different values of $this->_Xoffset: the higher the offset value, the closer I'm getting to a linear result.

 

Either I'm doing something totally wrong, or I need very specific offset values; or offsetting isn't the right way to go.

Please post some values, so that I can try myself (and see if I'm not wrong in this).

Current code:

class logarithmicBestFit
{
private $_bestFitType		= 'logarithmic';

private $_xValues			= array();

private $_yValues			= array();

private $_yBestFitValues	= array();

private $_goodnessOfFit 	= 1;

private $_stdevOfResiduals	= 0;

private $_slope				= 0;

private $_increment			= 0;

private $_Xoffset			= 0;


public function getBestFitType() {
	return $this->_bestFitType;
}	//	function getYPointFromSlopeIntersect()


public function getValueOfYForX($xValue) {
	return $this->getSlope() * log($xValue + $this->_Xoffset) + $this->getIntersect();
}	//	function getValueOfYForX()


public function getValueOfXForY($yValue) {
	return exp(($yValue - $this->getIntersect()) / $this->getSlope());
}	//	function getValueOfXForY()


public function getEquation($dp=0) {
	$slope = $this->getSlope();
	$intersect = $this->getIntersect();
	if ($dp != 0) {
		$slope = round($slope,$dp);
		$intersect = round($intersect,$dp);
	}

	return 'Y = '.$intersect.' + '.$slope.' * log(X)';
}


public function getSlope() {
	return $this->_slope;
}	//	function getSlope()


public function getIntersect() {
	return $this->_intersect;
}	//	function getIntersect()


public function getGoodnessOfFit() {
	return $this->_goodnessOfFit;
}	//	function getYPointFromSlopeIntersect()


public function getStdevOfResiduals() {
	return $this->_stdevOfResiduals;
}	//	function getStdevOfResiduals()


public function getYBestFitValues() {
	return $this->_yBestFitValues;
}	//	function getYBestFitValues()


private function _calculateGoodnessOfFit() {
	//	Calculate number of points
	$nY = count($this->_yValues);

	$meanY = array_sum($this->_yValues) / $nY;

	$SSreg = $SStot = 0.0;
	foreach($this->_xValues as $xKey => $xValue) {
		$bestFitY = $this->getValueOfYForX($xValue);

		$SSreg += ($this->_yValues[$xKey] - $bestFitY) * ($this->_yValues[$xKey] - $bestFitY);
		$SStot += ($this->_yValues[$xKey] - $meanY) * ($this->_yValues[$xKey] - $meanY);
	}

	$this->_stdevOfResiduals = sqrt($SSreg / ($nY - 2));
	if (($SStot == 0.0) || ($SSreg == $SStot)) {
		$this->_goodnessOfFit = 1;
	} else {
		$this->_goodnessOfFit = 1 - ($SSreg / $SStot);
	}
}	//	function _calculateGoodnessOfFit()


private function _logarithmic_regression($yValues, $nY, $xValues) {
	$mArray = $xValues;
	sort($mArray,SORT_NUMERIC);
	if ($mArray[0] <= 0.0) {
		$this->_Xoffset = abs($mArray[0]) + 1;
	}

	foreach($xValues as $key => $value) {
		$value += $this->_Xoffset;
		$xValues[$key] = log($value);
	}

	// calculate sums
	$x_sum = array_sum($xValues);
	$y_sum = array_sum($yValues);
	$xx_sum = $xy_sum = 0;
	for($i = 0; $i < $nY; $i++) {
		$xy_sum += $xValues[$i] * $yValues[$i];
		$xx_sum += $xValues[$i] * $xValues[$i];
	}

	// calculate slope
	$this->_slope = (($nY * $xy_sum) - ($x_sum * $y_sum)) / (($nY * $xx_sum) - ($x_sum * $x_sum));
	// calculate intersect
	$this->_intersect = ($y_sum - ($this->_slope * $x_sum)) / $nY;

	$this->_calculateGoodnessOfFit();

	foreach($mArray as $xValue) {
		$this->_yBestFitValues[] = $this->getValueOfYForX($xValue);
	}
}	//	function _logarithmic_regression()


function __construct($yValues, $xValues=array()) {
	//	Calculate number of points
	$nY = count($yValues);
	$nX = count($xValues);

	//	Define X Values if necessary
	if ($nX == 0) {
		$xValues = range(1,$nY);
		$nX = $nY;
	}
	//	Ensure both arrays of points are the same size
	if ($nY != $nX) {
		trigger_error("logarithmic_regression(): Number of elements in coordinate arrays do not match.", E_USER_ERROR);
	}

	$this->_xValues = $xValues;
	$this->_yValues = $yValues;

	$this->_logarithmic_regression($yValues, $nY, $xValues);
}	//	function __construct()

}	//	class logarithmicBestFit

 

Test data with negative values:

$testData = array( '-2.5'	=> 2.035,
	   '-2.0'	=> 2.107,
	   '-1.5'	=> 2.187,
	   '-1.0'	=> 2.279,
	   '-0.5'	=> 2.382,
	   '0.0'	=> 2.5,
	   '0.5'	=> 2.633,
	   '1.0'	=> 2.784,
	   '1.5'	=> 2.955,
	   '2.0'	=> 3.149,
	   '2.5'	=> 3.368,
	   '3.0'	=> 3.617,
	   '3.5'	=> 3.899,
	   '4.0'	=> 4.218,
	   '4.5'	=> 4.58,
	   '5.0'	=> 4.99
	 );

$xValues = array_keys($testData);
foreach($xValues as $key => $value) {
$xValues[$key] = floatval($value);
}

$bestFitLogarithmic = new logarithmicBestFit(array_values($testData),$xValues);

print_r($bestFitLogarithmic->getYBestFitValues())
echo '<hr />';
echo 'Slope = '.$bestFitLogarithmic->getSlope().'<br />';
echo 'Intersect = '.$bestFitLogarithmic->getIntersect().'<br />';
echo $bestFitLogarithmic->getEquation().'<br />';
echo '<br />';
echo 'Goodness of fit (R<sup>2</sup>) = '.$bestFitLogarithmic->getGoodnessOfFit().'<br />';
echo 'StdDev of Residuals = '.$bestFitLogarithmic->getStdevOfResiduals();

 

Any change to $this->_Xoffset = abs($mArray[0]) + 1 is giving different results.

Archived

This topic is now archived and is closed to further replies.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.