findshorty Posted February 20, 2008 Share Posted February 20, 2008 i've built a dynamic pdf creation script that removes all the microsoft guff from batch converted doc files (to html), and replaces various special characters that aren't supported in pdf. the class does the following: it is passed the filename of the file to be cleaned up and outputted file is retrieved into a string using either file_get_contents or using an output buffer (results the same in either case) string is passed to "fix" function that performs all the various cleanup operations on the string and returns the fixed string string is then outputted (or saved) as a pdf using dompdf (working fine, problem is nothing to do with dompdf) now - all the preg_replace's that i've used for special characters work perfectly well outside of my class, for example to remove all incorrectly encoded trademark symbols, but once inside the class, a few of them fail. the preg_replace calls are absolutely identical outside and inside the class, so i'm completely baffled as to what is going on <?php // other stuff // ... // ... // include the file to be fixed ob_start(); include "report_".$fileid.".html"; $report = ob_get_contents(); ob_end_clean(); // this is the absolutely vital preg_replace that works fine outside, but doesn't work inside the class $pattern = array('/ò/','/ñ/','/™/'); $replace = array("down","drive",$trade); $report = preg_replace($pattern,$replace,$report); // more preg_replacements, some of them fail inside the class again // return the fixed report // or output to dompdf if outside of the class return $report; ?> are included strings encoded differently when inside a class?? surely not, but its all i can think of any help or opinions most appreciated! cheers Quote Link to comment Share on other sites More sharing options...
aschk Posted February 20, 2008 Share Posted February 20, 2008 We can't see what you're doing in your class so it's rather difficult to make any judgements. Include the class file and we'll have another look. Quote Link to comment Share on other sites More sharing options...
rhodesa Posted February 20, 2008 Share Posted February 20, 2008 Yeah...don't show us what works, show us how you are using it inside the class. Quote Link to comment Share on other sites More sharing options...
findshorty Posted February 20, 2008 Author Share Posted February 20, 2008 many of the preg_replace's inside the fix_report function fail, however they work when outside of the class, which is very frustrating and for me, a little bit wierd. maybe some fresh eyes might see what the problem is! <?php class reportbuilder { function build($using) { global $IDQ,$CFG; // get the 3 required variables $this->id_score = $using["id_score"]; $this->first_name = $using["first_name"]; $this->surname = $using["surname"]; $this->IsHTML = $using["inhtml"]; // new filename $this->filename = $this->first_name."_".$this->surname."_".$this->id_score."_I.D.Report.pdf"; // and return false if any of them are empty if(!$this->id_score||!$this->first_name||!$this->surname) return false; // start the output string $this->output = array(); // get the various generic templates $template_arr = array("header","coverpage","idchart","howto","welcome","needs","talents","vulnerabilities","notes","footer"); foreach($template_arr as $f) $this->output[$f] = $this->get_template($f) or die("could not find template $f"); // get the report itself $report_orig = $this->get_report($this->id_score); $report = $this->fix_report($report_orig); $report_item = array("report"=>$report); // splice the report into the output array array_splice($this->output,5,0,$report_item); // and now replace all id_scores, first_names and surnames $rem = array("%%first_name%%","%%surname%%","%%id%%"); $rep = array($this->first_name,$this->surname,$this->id_score); foreach($this->output as $key=>$out) $this->output[$key] = str_replace($rem,$rep,$out); } function validate($using) { $valid = true; $req = array("first_name","surname","id_score","id_cdate"); if($using["ms"]) $req[] = "gender"; foreach($req as $field) { if(!$using[$field]) { $valid = false; break; } } return $valid; } function output($ispdf=false,$stream=false) { $this->html = ""; foreach($this->output as $out) $this->html .= $out; if(!$ispdf) { echo $this->html; } else { require_once(dirname(dirname(__FILE__))."/dompdf/dompdf_config.inc.php"); $this->dompdf = new DOMPDF(); $this->dompdf->load_html($this->html); $this->dompdf->render(); if(!$stream) { $this->filepath = dirname(dirname(__FILE__))."/idq/temp/".$this->filename; return file_put_contents($this->filepath,$this->dompdf->output()); } else { $this->stream(); } } } function stream() { $this->dompdf->stream($this->filename); } function get_template($tpl) { if(!file_exists(dirname(dirname(__FILE__))."/idq/generic_templates/$tpl.tpl.php")) return false; ob_start(); include dirname(dirname(__FILE__))."/idq/generic_templates/$tpl.tpl.php"; $str = ob_get_contents(); ob_end_clean(); return $str; } function get_report($id_score) { if(!file_exists(dirname(dirname(__FILE__))."/idq/report_templates/ID_Report_".$id_score.".html")) return false; ob_start(); include dirname(dirname(__FILE__))."/idq/report_templates/ID_Report_".$id_score.".html"; $str = ob_get_contents(); ob_end_clean(); return $str; } function fix_report($report) { $pattern = array('/ò/','/ñ/','/™/'); $replace = array("down","drive",$trade); $report = preg_replace($pattern,$replace,$report); // first, remove all empty tags $report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report); // remove the style sheet $report = preg_replace('/<style [^>]*>(.*)<\/style>/s','',$report); // replace the header portion of the report $pattern_arr = array('/<p [^>]*><span [^>]*>(.*)(Personalised I.D. Report for)(.*)<\/span><\/p>/', '/<p [^>]*><span [^>]*>(.*)(<firstname> <surname>)(.*)<\/span><\/p>/', '/<p [^>]*><span [^>]*>(.*)(\d{4,4})(.*)<\/span><\/p>/'); $replace_arr = array("<div style='border-top:solid 3px #000;border-bottom:solid 3px #000;padding:2px;font-size:1.2em;'> <div style='border-top:solid 1px #000;border-bottom:solid 1px #000; padding:20px 0;text-align:center'> <i>$2</i><br/>", "<b>$2<br/>", "$id</b></div></div>"); $report = preg_replace($pattern_arr,$replace_arr,$report); // make headers $pattern = '/<p[^>]*>.*?(YOUR NEEDS|NATURAL TALENTS|NATURAL VULNERABILITIES|RECOMMENDATIONS|THE NEXT FEW PAGES|YOUR FUTURE DIRECTION).*?<\/p>/'; $report = preg_replace($pattern,"<h1>$1</h1>",$report); // now we preg_replace all the "Relevant Strategies" $pattern = '/<p [^>]*><span [^>]*>(Relevant Strategies)<\/span><\/p>/s'; $report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3>',$report); $pattern = '/<ol [^>]*>(.*?)<\/ol>/isx'; $report = preg_replace($pattern,'<ol>$1</ol></div>',$report); // and "Relevant Strategy" $pattern = '/<p [^>]*><span [^>]*>(Relevant Strategy)<\/span><\/p>(.*?)<p [^>]*>(.*?)<\/p>/s'; $report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3><p>$3</p></div>',$report); // replace all ul-li-/li-/ul combinations with h2 //<ul>\s*<li>(??!<li>).)*?<\/li>\s*<\/ul> $pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).){1,128})<\/li>\s*<\/ul>/'; $report = preg_replace($pattern,"<h2>$1</h2>",$report); // replace single child <li> with a top border $pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).)*?)<\/li>\s*<\/ul>/s'; $report = preg_replace($pattern,"<p style='border-top:dotted 1px #000;padding-top:10px'>$1</p>",$report); // now remove all span tags $report = preg_replace('/<[\/]?span[^>]*>/','',$report); // remove all classes $report = preg_replace('/ class=rv[^>]*/','',$report); // and remove all empty tags again $report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report); //$vaci = array("Verify","Authenticate","Complete","Improvise"); $pattern = array('/(drive) (Verify|Authenticate|Complete|Improvise)/','/(down) (Verify|Authenticate|Complete|Improvise)/','/™/'); $replace = array(" <b><font face='ZapfDingbats'>s</font> $2</b> "," <b><font face='ZapfDingbats'>t</font> $2</b> "); $report = preg_replace($pattern,$replace,$report); // now remove all page breaks from the report $pattern = '/style="page-break[^>]*>/'; $report = preg_replace($pattern,'',$report); // finally remove the body and head tags // which will also remove the title and meta tags $pattern = '/<head>.*<\/head>|<\/?html>|<\/?body>|<!DOCTYPE[^>]*>/s'; $report = preg_replace($pattern,'',$report); $R = array("<firstname>"=>"Ian", "<surname>"=>"Young", "…"=>" ... ", "“"=>'"', "”"=>'"', "–"=>"-", "’"=>"'", "‘"=>"'", '"?'=>'?"', ' style="{ margin-left: 0 }'=>"", "background: #000000;" => "background: #000000;color:#fff", "<br>"=>"", " "=>" ", "<p></p>"=>" ", "<p ></p>"=>" " ); $report = str_replace(array_keys($R),$R,$report); // and return it return $report; } } ?> the following works perfectly <?php ob_start(); include "report_templates/ID_Report_".$_GET["id"].".html"; $report = ob_get_contents(); ob_end_clean(); //$report = file_get_contents("report_templates/ID_Report_2387.html"); //var_dump(strpos($report,'ñ')); $pattern = array('/ò/','/ñ/','/™/'); $replace = array("down","drive",$trade); $report = preg_replace($pattern,$replace,$report); // first, remove all empty tags $report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report); // remove the style sheet $report = preg_replace('/<style [^>]*>(.*)<\/style>/s','',$report); // replace the header portion of the report $pattern_arr = array('/<p [^>]*><span [^>]*>(.*)(Personalised I.D. Report for)(.*)<\/span><\/p>/', '/<p [^>]*><span [^>]*>(.*)(<firstname> <surname>)(.*)<\/span><\/p>/', '/<p [^>]*><span [^>]*>(.*)(\d{4,4})(.*)<\/span><\/p>/'); $replace_arr = array("<div style='border-top:solid 3px #000;border-bottom:solid 3px #000;padding:2px;font-size:1.2em;'> <div style='border-top:solid 1px #000;border-bottom:solid 1px #000; padding:20px 0;text-align:center'> <i>$2</i><br/>", "<b>$2<br/>", "$id</b></div></div>"); $report = preg_replace($pattern_arr,$replace_arr,$report); // make headers $pattern = '/<p[^>]*>.*?(YOUR NEEDS|NATURAL TALENTS|NATURAL VULNERABILITIES|RECOMMENDATIONS|THE NEXT FEW PAGES|YOUR FUTURE DIRECTION).*?<\/p>/'; $report = preg_replace($pattern,"<h1>$1</h1>",$report); // now we preg_replace all the "Relevant Strategies" $pattern = '/<p [^>]*><span [^>]*>(Relevant Strategies)<\/span><\/p>/s'; $report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3>',$report); $pattern = '/<ol [^>]*>(.*?)<\/ol>/isx'; $report = preg_replace($pattern,'<ol>$1</ol></div>',$report); // and "Relevant Strategy" $pattern = '/<p [^>]*><span [^>]*>(Relevant Strategy)<\/span><\/p>(.*?)<p [^>]*>(.*?)<\/p>/s'; $report = preg_replace($pattern,'<div class="strategy"><h3>$1</h3><p>$3</p></div>',$report); // replace all ul-li-/li-/ul combinations with h2 //<ul>\s*<li>(??!<li>).)*?<\/li>\s*<\/ul> $pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).){1,128})<\/li>\s*<\/ul>/'; $report = preg_replace($pattern,"<h2>$1</h2>",$report); // replace single child <li> with a top border $pattern = '/<ul[^>]*>\s*<li[^>]*>((??!<li[^>]*>).)*?)<\/li>\s*<\/ul>/s'; $report = preg_replace($pattern,"<p style='border-top:dotted 1px #000;padding-top:10px'>$1</p>",$report); // now remove all span tags $report = preg_replace('/<[\/]?span[^>]*>/','',$report); // remove all classes $report = preg_replace('/ class=rv[^>]*/','',$report); // and remove all empty tags again $report = preg_replace('#<(\w+)[^>]*>\s*</\1>#im',' ', $report); //$vaci = array("Verify","Authenticate","Complete","Improvise"); $pattern = array('/(drive) (Verify|Authenticate|Complete|Improvise)/','/(down) (Verify|Authenticate|Complete|Improvise)/','/™/'); $replace = array(" <b><font face='ZapfDingbats'>s</font> $2</b> "," <b><font face='ZapfDingbats'>t</font> $2</b> "); $report = preg_replace($pattern,$replace,$report); // now remove all page breaks from the report $pattern = '/style="page-break[^>]*>/'; $report = preg_replace($pattern,'',$report); // finally remove the body and head tags // which will also remove the title and meta tags $pattern = '/<head>.*<\/head>|<\/?html>|<\/?body>|<!DOCTYPE[^>]*>/s'; $report = preg_replace($pattern,'',$report); $R = array("<firstname>"=>"Ian", "<surname>"=>"Young", "…"=>" ... ", "“"=>'"', "”"=>'"', "–"=>"-", "’"=>"'", "‘"=>"'", '"?'=>'?"', ' style="{ margin-left: 0 }'=>"", "background: #000000;" => "background: #000000;color:#fff", "<br>"=>"", " "=>" ", "<p></p>"=>" ", "<p ></p>"=>" " ); $report = str_replace(array_keys($R),$R,$report); ?> Quote Link to comment Share on other sites More sharing options...
aschk Posted February 21, 2008 Share Posted February 21, 2008 The first thing that jumps out at me is the following: $replace = array("down","drive",$trade); Inside the scope of your class function, the variable $trade doesn't exist. So i've no idea what behaviour this will cause, but i'd start with that Quote Link to comment Share on other sites More sharing options...
findshorty Posted February 21, 2008 Author Share Posted February 21, 2008 $trade is a trademark symbol included from a properly encoded template file. thanks for pointing out it is now missing - it was in the original class before i started bug checking etc. and no, its never been replaced successfully from within the class, along with all the other special characters. i'm pretty sure it has got something to do with the presumed or actual encoding of an included file when included by a class. everything else about the class works perfectly - if i want to preview the pdf, it outputs to the browser, if i want to save the pdf, it saves it within the filesystem, and all the other preg_replacements work perfectly. the ONLY thing that doesn't work is these special character preg_replacements. Quote Link to comment Share on other sites More sharing options...
aschk Posted February 21, 2008 Share Posted February 21, 2008 An include only works for the scope that it's included in, and anything outside that scope can't access it, or inside it's own scope (i.e. function or class method) cannot access it. i.e. include("filename.php"); function myfunc(){ echo $trade; // $trade is in filename.php // and WILL work. } class myclass { function func1(){ include("filename.php"); // This include is ONLY in the scope of this function! } function func2(){ echo $trade; // This WON'T work, because $trade doesn't exist in the scope. } } class my2ndclass { function func1(){ include("filename.php"); echo $trade; // this WILL work. } } Hope that clears something up. Quote Link to comment Share on other sites More sharing options...
findshorty Posted February 21, 2008 Author Share Posted February 21, 2008 yes i know, the fix function actually starts like this: <?php function fix_report($report) { $trade = $this->get_template("trade"); $pattern = array('/ò/','/ñ/','/™/'); $replace = array("down","drive",$trade); $report = preg_replace($pattern,$replace,$report); // etc. ?> essentially the regex's '/ò/', '/ñ/' and '/™/' are not being found or replaced with preg_replace when used within the class, but are being found and replaced when outside. why would this be? all the other preg_replaces work, so it has to be something to do with the encoding. Quote Link to comment Share on other sites More sharing options...
mainewoods Posted February 25, 2008 Share Posted February 25, 2008 aschk is right, it is probably that $trade variable or maybe the $report variable. When in doubt, echo it out. Try adding a statement to echo everything out before the preg_replace function fix_report($report) { $pattern = array('/ò/','/ñ/','/™/'); $replace = array("down","drive",$trade); // debug print_r($pattern); print_r($replace); echo $report; $report = preg_replace($pattern,$replace,$report); Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.