Mark Baker Posted December 26, 2008 Share Posted December 26, 2008 I've been doing some work on a set of classes for calculating best fit trend lines from a set of data points using least square. For a logarithmic best fit, I need to get the log of the X-Values before executing the least square routine. The class that I've written is shown below. <?php class logarithmicBestFit { private $_bestFitType = 'logarithmic'; private $_xValues = array(); private $_yValues = array(); private $_yBestFitValues = array(); private $_goodnessOfFit = 1; private $_stdevOfResiduals = 0; private $_slope = 0; private $_increment = 0; public function getBestFitType() { return $this->_bestFitType; } // function getYPointFromSlopeIntersect() public function getValueOfYForX($xValue) { return $this->getSlope() * $xValue + $this->getIntersect(); } // function getValueOfYForX() public function getValueOfXForY($yValue) { return exp(($yValue - $this->getIntersect()) / $this->getSlope()); } // function getValueOfXForY() public function getSlope() { return $this->_slope; } // function getSlope() public function getIntersect() { return $this->_intersect; } // function getIntersect() public function getGoodnessOfFit() { return $this->_goodnessOfFit; } // function getYPointFromSlopeIntersect() public function getStdevOfResiduals() { return $this->_stdevOfResiduals; } // function getStdevOfResiduals() public function getYBestFitValues() { return $this->_yBestFitValues; } // function getYBestFitValues() private function _calculateGoodnessOfFit() { // Calculate number of points $nY = count($this->_yValues); $meanY = array_sum($this->_yValues) / $nY; $SSreg = $SStot = 0.0; foreach($this->_xValues as $xKey => $xValue) { $bestFitY = $this->getValueOfYForX($xValue); $SSreg += ($this->_yValues[$xKey] - $bestFitY) * ($this->_yValues[$xKey] - $bestFitY); $SStot += ($this->_yValues[$xKey] - $meanY) * ($this->_yValues[$xKey] - $meanY); } $this->_stdevOfResiduals = sqrt($SSreg / ($nY - 2)); if (($SStot == 0.0) || ($SSreg == $SStot)) { $this->_goodnessOfFit = 1; } else { $this->_goodnessOfFit = 1 - ($SSreg / $SStot); } } // function _calculateGoodnessOfFit() private function _logarithmic_regression($yValues, $nY, $xValues) { foreach($xValues as $key => $value) { $xValues[$key] = log($xValues[$key]); } // calculate sums $x_sum = array_sum($xValues); $y_sum = array_sum($yValues); $xx_sum = $xy_sum = 0; for($i = 0; $i < $nY; $i++) { $xy_sum += $xValues[$i] * $yValues[$i]; $xx_sum += $xValues[$i] * $xValues[$i]; } // calculate slope $this->_slope = (($nY * $xy_sum) - ($x_sum * $y_sum)) / (($nY * $xx_sum) - ($x_sum * $x_sum)); // calculate intersect $this->_intersect = ($y_sum - ($this->_slope * $x_sum)) / $nY; $this->_calculateGoodnessOfFit(); foreach($this->_xValues as $xValue) { $this->_yBestFitValues[] = $this->getValueOfYForX($xValue); } } // function _logarithmic_regression() function __construct($yValues, $xValues=array()) { // Calculate number of points $nY = count($yValues); $nX = count($xValues); // Define X Values if necessary if ($nX == 0) { $xValues = range(1,$nY); $nX = $nY; } // Ensure both arrays of points are the same size if ($nY != $nX) { trigger_error("logarithmic_regression(): Number of elements in coordinate arrays do not match.", E_USER_ERROR); } $this->_xValues = $xValues; $this->_yValues = $yValues; $this->_logarithmic_regression($yValues, $nY, $xValues); } // function __construct() } // class logarithmicBestFit ?> The problem arises when any of the X-Values are 0.0 or are negative. I've tried adjusting the loop that sets the log of X with a set of if tests to return values of -1 for an X of 0.0, and -1 -(log(-1 - X)) where X is negative, but can't get the correctly restored values in the getValueOfYForX() and getValueOfXForY() methods. I've also considered the possibility of offsetting X-Values to ensure that they're never 0 or below, but am not sure how this will affect the least-square calculation. I'd appreciate any help in the best method to resolve this issue from someone with better math than myself. Link to comment https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/ Share on other sites More sharing options...
Mchl Posted December 26, 2008 Share Posted December 26, 2008 I've also considered the possibility of offsetting X-Values to ensure that they're never 0 or below, but am not sure how this will affect the least-square calculation. It will not. You will need to add the offset to arguments of calculated formula. So, if you offset the values by offset 'o', and the calculated formula is y=a + b* ln(x), then your final formula will be y = a + b * ln(x+o) Link to comment https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/#findComment-723925 Share on other sites More sharing options...
Mark Baker Posted December 26, 2008 Author Share Posted December 26, 2008 I've tried offsetting the X-Values immediately prior to my least square calculation using: foreach($xValues as $key => $value) { $value += $this->_Xoffset; $xValues[$key] = log($value); } Then modified the getValueOfYForX() method to use the same offset public function getValueOfYForX($xValue) { return $this->getSlope() * log($xValue + $this->_Xoffset) + $this->getIntersect(); } // function getValueOfYForX() but I'm getting totally different curves for different values of $this->_Xoffset: the higher the offset value, the closer I'm getting to a linear result. Either I'm doing something totally wrong, or I need very specific offset values; or offsetting isn't the right way to go. Link to comment https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/#findComment-724169 Share on other sites More sharing options...
Mchl Posted December 26, 2008 Share Posted December 26, 2008 Please post some values, so that I can try myself (and see if I'm not wrong in this). Link to comment https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/#findComment-724177 Share on other sites More sharing options...
Mark Baker Posted December 26, 2008 Author Share Posted December 26, 2008 Please post some values, so that I can try myself (and see if I'm not wrong in this). Current code: class logarithmicBestFit { private $_bestFitType = 'logarithmic'; private $_xValues = array(); private $_yValues = array(); private $_yBestFitValues = array(); private $_goodnessOfFit = 1; private $_stdevOfResiduals = 0; private $_slope = 0; private $_increment = 0; private $_Xoffset = 0; public function getBestFitType() { return $this->_bestFitType; } // function getYPointFromSlopeIntersect() public function getValueOfYForX($xValue) { return $this->getSlope() * log($xValue + $this->_Xoffset) + $this->getIntersect(); } // function getValueOfYForX() public function getValueOfXForY($yValue) { return exp(($yValue - $this->getIntersect()) / $this->getSlope()); } // function getValueOfXForY() public function getEquation($dp=0) { $slope = $this->getSlope(); $intersect = $this->getIntersect(); if ($dp != 0) { $slope = round($slope,$dp); $intersect = round($intersect,$dp); } return 'Y = '.$intersect.' + '.$slope.' * log(X)'; } public function getSlope() { return $this->_slope; } // function getSlope() public function getIntersect() { return $this->_intersect; } // function getIntersect() public function getGoodnessOfFit() { return $this->_goodnessOfFit; } // function getYPointFromSlopeIntersect() public function getStdevOfResiduals() { return $this->_stdevOfResiduals; } // function getStdevOfResiduals() public function getYBestFitValues() { return $this->_yBestFitValues; } // function getYBestFitValues() private function _calculateGoodnessOfFit() { // Calculate number of points $nY = count($this->_yValues); $meanY = array_sum($this->_yValues) / $nY; $SSreg = $SStot = 0.0; foreach($this->_xValues as $xKey => $xValue) { $bestFitY = $this->getValueOfYForX($xValue); $SSreg += ($this->_yValues[$xKey] - $bestFitY) * ($this->_yValues[$xKey] - $bestFitY); $SStot += ($this->_yValues[$xKey] - $meanY) * ($this->_yValues[$xKey] - $meanY); } $this->_stdevOfResiduals = sqrt($SSreg / ($nY - 2)); if (($SStot == 0.0) || ($SSreg == $SStot)) { $this->_goodnessOfFit = 1; } else { $this->_goodnessOfFit = 1 - ($SSreg / $SStot); } } // function _calculateGoodnessOfFit() private function _logarithmic_regression($yValues, $nY, $xValues) { $mArray = $xValues; sort($mArray,SORT_NUMERIC); if ($mArray[0] <= 0.0) { $this->_Xoffset = abs($mArray[0]) + 1; } foreach($xValues as $key => $value) { $value += $this->_Xoffset; $xValues[$key] = log($value); } // calculate sums $x_sum = array_sum($xValues); $y_sum = array_sum($yValues); $xx_sum = $xy_sum = 0; for($i = 0; $i < $nY; $i++) { $xy_sum += $xValues[$i] * $yValues[$i]; $xx_sum += $xValues[$i] * $xValues[$i]; } // calculate slope $this->_slope = (($nY * $xy_sum) - ($x_sum * $y_sum)) / (($nY * $xx_sum) - ($x_sum * $x_sum)); // calculate intersect $this->_intersect = ($y_sum - ($this->_slope * $x_sum)) / $nY; $this->_calculateGoodnessOfFit(); foreach($mArray as $xValue) { $this->_yBestFitValues[] = $this->getValueOfYForX($xValue); } } // function _logarithmic_regression() function __construct($yValues, $xValues=array()) { // Calculate number of points $nY = count($yValues); $nX = count($xValues); // Define X Values if necessary if ($nX == 0) { $xValues = range(1,$nY); $nX = $nY; } // Ensure both arrays of points are the same size if ($nY != $nX) { trigger_error("logarithmic_regression(): Number of elements in coordinate arrays do not match.", E_USER_ERROR); } $this->_xValues = $xValues; $this->_yValues = $yValues; $this->_logarithmic_regression($yValues, $nY, $xValues); } // function __construct() } // class logarithmicBestFit Test data with negative values: $testData = array( '-2.5' => 2.035, '-2.0' => 2.107, '-1.5' => 2.187, '-1.0' => 2.279, '-0.5' => 2.382, '0.0' => 2.5, '0.5' => 2.633, '1.0' => 2.784, '1.5' => 2.955, '2.0' => 3.149, '2.5' => 3.368, '3.0' => 3.617, '3.5' => 3.899, '4.0' => 4.218, '4.5' => 4.58, '5.0' => 4.99 ); $xValues = array_keys($testData); foreach($xValues as $key => $value) { $xValues[$key] = floatval($value); } $bestFitLogarithmic = new logarithmicBestFit(array_values($testData),$xValues); print_r($bestFitLogarithmic->getYBestFitValues()) echo '<hr />'; echo 'Slope = '.$bestFitLogarithmic->getSlope().'<br />'; echo 'Intersect = '.$bestFitLogarithmic->getIntersect().'<br />'; echo $bestFitLogarithmic->getEquation().'<br />'; echo '<br />'; echo 'Goodness of fit (R<sup>2</sup>) = '.$bestFitLogarithmic->getGoodnessOfFit().'<br />'; echo 'StdDev of Residuals = '.$bestFitLogarithmic->getStdevOfResiduals(); Any change to $this->_Xoffset = abs($mArray[0]) + 1 is giving different results. Link to comment https://forums.phpfreaks.com/topic/138460-logarithmic-best-fit-negative-x-values/#findComment-724187 Share on other sites More sharing options...
Recommended Posts
Archived
This topic is now archived and is closed to further replies.