Jump to content

different preg_replace outcomes inside and outside class method


findshorty

Recommended Posts

i've built a dynamic pdf creation script that removes all the microsoft guff from batch converted doc files (to html), and replaces various special characters that aren't supported in pdf.

 

the class does the following:

 

it is passed the filename of the file to be cleaned up and outputted

file is retrieved into a string using either file_get_contents or using an output buffer (results the same in either case)

string is passed to "fix" function that performs all the various cleanup operations on the string and returns the fixed string

string is then outputted (or saved) as a pdf using dompdf (working fine, problem is nothing to do with dompdf)

 

now - all the preg_replace's that i've used for special characters work perfectly well outside of my class, for example to remove all incorrectly encoded trademark symbols, but once inside the class, a few of them fail.

 

the preg_replace calls are absolutely identical outside and inside the class, so i'm completely baffled as to what is going on

 

<?php
// other stuff
// ...
// ...
// include the file to be fixed
ob_start();
include "report_".$fileid.".html";
$report = ob_get_contents();
ob_end_clean();

// this is the absolutely vital preg_replace that works fine outside, but doesn't work inside the class
$pattern = array('/ò/','/ñ/','/™/');
$replace = array("down","drive",$trade);
$report = preg_replace($pattern,$replace,$report);

// more preg_replacements, some of them fail inside the class again

// return the fixed report
// or output to dompdf if outside of the class
return $report;
?>

 

are included strings encoded differently when inside a class?? surely not, but its all i can think of

 

any help or opinions most appreciated!

cheers

Link to comment
Share on other sites

many of the preg_replace's inside the fix_report function fail, however they work when outside of the class, which is very frustrating and for me, a little bit wierd.

 

maybe some fresh eyes might see what the problem is!

 

<?php
class reportbuilder {
        function build($using) {
	global $IDQ,$CFG;

	// get the 3 required variables
	$this->id_score = $using["id_score"];
	$this->first_name = $using["first_name"];
	$this->surname = $using["surname"];

	$this->IsHTML = $using["inhtml"];

	// new filename
	$this->filename = $this->first_name."_".$this->surname."_".$this->id_score."_I.D.Report.pdf";

	// and return false if any of them are empty
	if(!$this->id_score||!$this->first_name||!$this->surname)
		return false;

	// start the output string
	$this->output = array();

	// get the various generic templates
	$template_arr = array("header","coverpage","idchart","howto","welcome","needs","talents","vulnerabilities","notes","footer");
	foreach($template_arr as $f)
		$this->output[$f] = $this->get_template($f) or die("could not find template $f");

	// get the report itself
	$report_orig = $this->get_report($this->id_score);
	$report = $this->fix_report($report_orig);
	$report_item = array("report"=>$report);

	// splice the report into the output array
	array_splice($this->output,5,0,$report_item);

	// and now replace all id_scores, first_names and surnames
	$rem = array("%%first_name%%","%%surname%%","%%id%%");
	$rep = array($this->first_name,$this->surname,$this->id_score);
	foreach($this->output as $key=>$out)
		$this->output[$key] = str_replace($rem,$rep,$out);

}
function validate($using) {
	$valid = true;
	$req = array("first_name","surname","id_score","id_cdate");
	if($using["ms"])
		$req[] = "gender";
	foreach($req as $field) {
		if(!$using[$field]) {
			$valid = false;
			break;
		}
	}
	return $valid;
}
function output($ispdf=false,$stream=false) {
	$this->html = "";
	foreach($this->output as $out)
		$this->html .= $out;
	if(!$ispdf) {
		echo $this->html;
	} else {
		require_once(dirname(dirname(__FILE__))."/dompdf/dompdf_config.inc.php");
		$this->dompdf = new DOMPDF();
		$this->dompdf->load_html($this->html);
		$this->dompdf->render();
		if(!$stream) {
			$this->filepath = dirname(dirname(__FILE__))."/idq/temp/".$this->filename;
			return file_put_contents($this->filepath,$this->dompdf->output());
		} else {
			$this->stream();
		}
	}
}
function stream() {
	$this->dompdf->stream($this->filename);
}
function get_template($tpl) {
	if(!file_exists(dirname(dirname(__FILE__))."/idq/generic_templates/$tpl.tpl.php"))
		return false;
	ob_start();
	include dirname(dirname(__FILE__))."/idq/generic_templates/$tpl.tpl.php";
	$str = ob_get_contents();
	ob_end_clean();
	return $str;
}
function get_report($id_score) {
	if(!file_exists(dirname(dirname(__FILE__))."/idq/report_templates/ID_Report_".$id_score.".html"))
		return false;
	ob_start();
	include dirname(dirname(__FILE__))."/idq/report_templates/ID_Report_".$id_score.".html";
	$str = ob_get_contents();
	ob_end_clean();
	return $str;
}
function fix_report($report) {

	$pattern = array('/ò/','/ñ/','/™/');
	$replace = array("down","drive",$trade);
	$report = preg_replace($pattern,$replace,$report);

	// first, remove all empty tags
	$report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report);

	// remove the style sheet
	$report = preg_replace('/<style [^>]*>(.*)<\/style>/s','',$report);

	// replace the header portion of the report
	$pattern_arr = array('/<p [^>]*><span [^>]*>(.*)(Personalised I.D. Report for)(.*)<\/span><\/p>/',
				'/<p [^>]*><span [^>]*>(.*)(<firstname> <surname>)(.*)<\/span><\/p>/',
				'/<p [^>]*><span [^>]*>(.*)(\d{4,4})(.*)<\/span><\/p>/');
	$replace_arr = array("<div style='border-top:solid 3px #000;border-bottom:solid 3px #000;padding:2px;font-size:1.2em;'>
		<div style='border-top:solid 1px #000;border-bottom:solid 1px #000; padding:20px 0;text-align:center'>
			<i>$2</i><br/>",
			"<b>$2<br/>",
			"$id</b></div></div>");
	$report = preg_replace($pattern_arr,$replace_arr,$report);

	// make headers
	$pattern = '/<p[^>]*>.*?(YOUR NEEDS|NATURAL TALENTS|NATURAL VULNERABILITIES|RECOMMENDATIONS|THE NEXT FEW PAGES|YOUR FUTURE DIRECTION).*?<\/p>/';
	$report = preg_replace($pattern,"<h1>$1</h1>",$report);

	// now we preg_replace all the "Relevant Strategies"
	$pattern = '/<p [^>]*><span [^>]*>(Relevant Strategies)<\/span><\/p>/s';
	$report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3>',$report);
	$pattern = '/<ol [^>]*>(.*?)<\/ol>/isx';
	$report = preg_replace($pattern,'<ol>$1</ol></div>',$report);

	// and "Relevant Strategy"
	$pattern = '/<p [^>]*><span [^>]*>(Relevant Strategy)<\/span><\/p>(.*?)<p [^>]*>(.*?)<\/p>/s';
	$report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3><p>$3</p></div>',$report);

	// replace all ul-li-/li-/ul combinations with h2
	//<ul>\s*<li>(??!<li>).)*?<\/li>\s*<\/ul>
	$pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).){1,128})<\/li>\s*<\/ul>/';
	$report = preg_replace($pattern,"<h2>$1</h2>",$report);

	// replace single child <li> with a top border
	$pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).)*?)<\/li>\s*<\/ul>/s';
	$report = preg_replace($pattern,"<p style='border-top:dotted 1px #000;padding-top:10px'>$1</p>",$report);

	// now remove all span tags
	$report = preg_replace('/<[\/]?span[^>]*>/','',$report);

	// remove all classes
	$report = preg_replace('/ class=rv[^>]*/','',$report);

	// and remove all empty tags again
	$report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report);

	//$vaci = array("Verify","Authenticate","Complete","Improvise");
	$pattern = array('/(drive) (Verify|Authenticate|Complete|Improvise)/','/(down) (Verify|Authenticate|Complete|Improvise)/','/™/');
	$replace = array(" <b><font face='ZapfDingbats'>s</font> $2</b> "," <b><font face='ZapfDingbats'>t</font> $2</b> ");
	$report = preg_replace($pattern,$replace,$report);

	// now remove all page breaks from the report
	$pattern = '/style="page-break[^>]*>/';
	$report = preg_replace($pattern,'',$report);

	// finally remove the body and head tags
	// which will also remove the title and meta tags
	$pattern = '/<head>.*<\/head>|<\/?html>|<\/?body>|<!DOCTYPE[^>]*>/s';
	$report = preg_replace($pattern,'',$report);

	$R = array("<firstname>"=>"Ian",
				"<surname>"=>"Young",
				"…"=>" ... ",
				"“"=>'"',
				"”"=>'"',
				"–"=>"-",
				"’"=>"'",
				"‘"=>"'",
				'"?'=>'?"',
				' style="{ margin-left: 0 }'=>"",
				"background: #000000;" => "background: #000000;color:#fff",
				"<br>"=>"",
				" "=>" ",
				"<p></p>"=>" ",
				"<p ></p>"=>" "
				);
	$report = str_replace(array_keys($R),$R,$report);

	// and return it
	return $report;
}
}
?>

 

the following works perfectly

 

<?php
ob_start();
include "report_templates/ID_Report_".$_GET["id"].".html";
$report = ob_get_contents();
ob_end_clean();

//$report = file_get_contents("report_templates/ID_Report_2387.html");

//var_dump(strpos($report,'ñ'));

$pattern = array('/ò/','/ñ/','/™/');
$replace = array("down","drive",$trade);

$report = preg_replace($pattern,$replace,$report);

// first, remove all empty tags
$report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report);

// remove the style sheet
$report = preg_replace('/<style [^>]*>(.*)<\/style>/s','',$report);

// replace the header portion of the report
$pattern_arr = array('/<p [^>]*><span [^>]*>(.*)(Personalised I.D. Report for)(.*)<\/span><\/p>/',
				'/<p [^>]*><span [^>]*>(.*)(<firstname> <surname>)(.*)<\/span><\/p>/',
				'/<p [^>]*><span [^>]*>(.*)(\d{4,4})(.*)<\/span><\/p>/');
$replace_arr = array("<div style='border-top:solid 3px #000;border-bottom:solid 3px #000;padding:2px;font-size:1.2em;'>
		<div style='border-top:solid 1px #000;border-bottom:solid 1px #000; padding:20px 0;text-align:center'>
			<i>$2</i><br/>",
			"<b>$2<br/>",
			"$id</b></div></div>");
$report = preg_replace($pattern_arr,$replace_arr,$report);

// make headers
$pattern = '/<p[^>]*>.*?(YOUR NEEDS|NATURAL TALENTS|NATURAL VULNERABILITIES|RECOMMENDATIONS|THE NEXT FEW PAGES|YOUR FUTURE DIRECTION).*?<\/p>/';
$report = preg_replace($pattern,"<h1>$1</h1>",$report);

// now we preg_replace all the "Relevant Strategies"
$pattern = '/<p [^>]*><span [^>]*>(Relevant Strategies)<\/span><\/p>/s';
$report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3>',$report);
$pattern = '/<ol [^>]*>(.*?)<\/ol>/isx';
$report = preg_replace($pattern,'<ol>$1</ol></div>',$report);

// and "Relevant Strategy"
$pattern = '/<p [^>]*><span [^>]*>(Relevant Strategy)<\/span><\/p>(.*?)<p [^>]*>(.*?)<\/p>/s';
$report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3><p>$3</p></div>',$report);

// replace all ul-li-/li-/ul combinations with h2
//<ul>\s*<li>(??!<li>).)*?<\/li>\s*<\/ul>
$pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).){1,128})<\/li>\s*<\/ul>/';
$report = preg_replace($pattern,"<h2>$1</h2>",$report);

// replace single child <li> with a top border
$pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).)*?)<\/li>\s*<\/ul>/s';
$report = preg_replace($pattern,"<p style='border-top:dotted 1px #000;padding-top:10px'>$1</p>",$report);

// now remove all span tags
$report = preg_replace('/<[\/]?span[^>]*>/','',$report);

// remove all classes
$report = preg_replace('/ class=rv[^>]*/','',$report);

// and remove all empty tags again
$report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report);

//$vaci = array("Verify","Authenticate","Complete","Improvise");
$pattern = array('/(drive) (Verify|Authenticate|Complete|Improvise)/','/(down) (Verify|Authenticate|Complete|Improvise)/','/™/');
$replace = array(" <b><font face='ZapfDingbats'>s</font> $2</b> "," <b><font face='ZapfDingbats'>t</font> $2</b> ");
$report = preg_replace($pattern,$replace,$report);

// now remove all page breaks from the report
$pattern = '/style="page-break[^>]*>/';
$report = preg_replace($pattern,'',$report);

// finally remove the body and head tags
// which will also remove the title and meta tags
$pattern = '/<head>.*<\/head>|<\/?html>|<\/?body>|<!DOCTYPE[^>]*>/s';
$report = preg_replace($pattern,'',$report);

$R = array("<firstname>"=>"Ian",
		"<surname>"=>"Young",
		"…"=>" ... ",
		"“"=>'"',
		"”"=>'"',
		"–"=>"-",
		"’"=>"'",
		"‘"=>"'",
		'"?'=>'?"',
		' style="{ margin-left: 0 }'=>"",
		"background: #000000;" => "background: #000000;color:#fff",
		"<br>"=>"",
		" "=>" ",
		"<p></p>"=>" ",
		"<p ></p>"=>" "
		);
$report = str_replace(array_keys($R),$R,$report);
?>

Link to comment
Share on other sites

$trade is a trademark symbol included from a properly encoded template file.

thanks for pointing out it is now missing - it was in the original class before i started bug checking etc. and no, its never been replaced successfully from within the class, along with all the other special characters.

 

i'm pretty sure it has got something to do with the presumed or actual encoding of an included file when included by a class.

 

everything else about the class works perfectly - if i want to preview the pdf, it outputs to the browser, if i want to save the pdf, it saves    it within the filesystem, and all the other preg_replacements work perfectly. the ONLY thing that doesn't work is these special character preg_replacements.

Link to comment
Share on other sites

An include only works for the scope that it's included in, and anything outside that scope can't access it, or inside it's own scope (i.e. function or class method) cannot access it.

 

i.e.

include("filename.php");

function myfunc(){
  echo $trade; // $trade is in filename.php // and WILL work.
}

class myclass {
  function func1(){
    include("filename.php"); // This include is ONLY in the scope of this function!
  }

  function func2(){
    echo $trade; // This WON'T work, because $trade doesn't exist in the scope.
  }
}

class my2ndclass {
  function func1(){
    include("filename.php");
    echo $trade; // this WILL work.
  }
}

 

Hope that clears something up.

Link to comment
Share on other sites

yes i know, the fix function actually starts like this:

 

<?php
function fix_report($report) {

	$trade = $this->get_template("trade");

	$pattern = array('/ò/','/ñ/','/™/');
	$replace = array("down","drive",$trade);
	$report = preg_replace($pattern,$replace,$report);
// etc.

?>

 

essentially the regex's '/ò/', '/ñ/' and '/™/' are not being found or replaced with preg_replace when used within the class, but are being found and replaced when outside. why would this be? all the other preg_replaces work, so it has to be something to do with the encoding.

Link to comment
Share on other sites

aschk is right, it is probably that $trade variable or maybe the $report variable. When in doubt, echo it out. Try adding a statement to echo everything out before the preg_replace

function fix_report($report) {

    $pattern = array('/ò/','/ñ/','/™/');
    $replace = array("down","drive",$trade);
    // debug
    print_r($pattern); print_r($replace); echo $report; 
    $report = preg_replace($pattern,$replace,$report);

Link to comment
Share on other sites

This thread is more than a year old. Please don't revive it unless you have something important to add.

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue.