secweb Posted September 15, 2015 Share Posted September 15, 2015 I'm using the following function to test for initial variable sanity, how does it fair?: function rx_valid_data($data,$type){ // type: email,pass,name,names,filename,words,text,title,digits,int,dec,hex // The special regular expression characters are: . \ + * ? [ ^ ] $ ( ) { } = ! < > | : - if($type=="email"){ //if(eregi("[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}",$email)){ return true; }else{ return false; } if(filter_var($data, FILTER_VALIDATE_EMAIL)) { return true; }else{ return false; } }elseif($type=="pass"){ //if(preg_match('/^[:# !£$\._a-z\d]+$/i', $data)){ // has only chars & digits and #: !£$._ if(preg_match('/^[:# !£$\.\w]+$/i', $data)){ // has only chars & digits and #: !£$._ return true; } //if(preg_match('/[a-z]/i', $string))){} // has at least one char //if(preg_match('/\d/', $string))){} // has at least one digit return false; }elseif($type=="name"){ //if(preg_match('/^[_a-z\d]+$/i', $data)){ // has only chars & digits and underscores if(preg_match('/^[\w]+$/i', $data)){ // has only chars & digits and underscores return true; } }elseif($type=="names"){ // NAME LIST //if(preg_match('/^[, \t_a-z\d]+$/i', $data)){ // has only chars & digits, underscores, comma, space, tab if(preg_match('/^[, \t\w]+$/i', $data)){ // has only chars & digits, underscores, comma, space, tab return true; } }elseif($type=="filename"){ //if(preg_match('/^[-0-9A-Z_\.]+$/i', $data)){ // has only chars & digits, underscores and periods if(preg_match('/^[-\w\.]+$/i', $data)){ // has only chars & digits, underscores and periods return true; } }elseif($type=="words"){ if(preg_match('/^[\._ \'\"\!£\$%&\*\(\)\+\-\<\>\|\:;\?\=\[\]\{\}\^\\/\s\tA-Za-z\d]+$/im', $data)){ // return true; } }elseif($type=="text"){ // ALLOWS EVERYTHING THROUGH AT THE MOMENT //if(preg_match('/^[_ \'\"\!£\$%&\*\(\)\+\-\<\>\|\:;\?\=\[\]\{\}\^\\/\s\tA-Za-z\d]+$/im', $data)){ // return true; //} }elseif($type=="title"){ if(preg_match('/^[\._ \!£\$%&\*\(\)\+\-\<\>\|\:;\?\=\[\]\{\}\^\\/\'\"a-z\d]+$/i', $data)){ // return true; } }elseif($type=="digits"){ if(is_int($data)|ctype_digit($data)){ return true; } // is only digits, 0, 123, etc... }elseif($type=="int"){ //if(is_numeric($data)){ return true; } // if(filter_var($data, FILTER_VALIDATE_INT)){ return true; } }elseif($type=="dec"){ //if(is_numeric($data)&&strpos($data,'.')===false){ return true; } // if(is_numeric($data)){ return true; } // }elseif($type=="hex"){ if(ctype_xdigit($data)){ return true; } // is hex } return false; } Here's a sort of unit test. The int one fails with leading zeros, the email tests are from the filter_var() php.net page, are they ok to ignore?: echo "Initialised".BL; $err_tot=0; unit_test_funcs_01(); echo "FUNCS TOTAL ERRORS: ".$err_tot.BL; function unit_test_funcs_01(){ echo "TESTING: rx_valid_data".BL; //rx_valid_data($data,$type) // type: email,pass,name,names,filename,words,text,title,digits,int,dec,hex /* 'localpart.ending.with.dot.@example.com', '(comment)localpart@example.com', '"this is v@lid!"@example.com', '"much.more unusual"@example.com', 'postbox@com', 'admin@mailserver1', '"()<>[]:,;@\\"\\\\!#$%&\'*+-/=?^_`{}| ~.a"@example.org', '" "@example.org' */ // PASS TEST: email $a_pass=array("a@b.com","a@b.cow","уникум@из.рф",'localpart.ending.with.dot.@example.com','postbox@com','admin@mailserver1','"()<>[]:,;@\\"\\\\!#$%&\'*+-/=?^_`{}| ~.a"@example.org','" "@example.org'); // ,'(comment)localpart@example.com','"this is v@lid!"@example.com','"much.more unusual"@example.com' $a_fail=array("ab.com","a^@b.com"); do_test("email",$a_pass,$a_fail); // TEST: pass // has only chars & digits and #: !£$._ $a_pass=array("password","123","abc_def","#: !£\$a0"); $a_fail=array("pass*word","","hkjhkl%jh","hkjh^"); do_test("pass",$a_pass,$a_fail); // TEST: name // has only chars & digits and underscores $a_pass=array("name","name_0","abc_def"); $a_fail=array("my*name","","hkjhkl%jh","hkjh^"); do_test("name",$a_pass,$a_fail); // TEST: names // has only chars & digits, underscores, comma, space, tab $a_pass=array("name","name_0, name_1","name name","name,name","name name"); $a_fail=array("my*name","","hkjhkl%jh","hkjh^"); do_test("names",$a_pass,$a_fail); // TEST: filename // has only chars & digits, underscores and periods $a_pass=array("name.xyz","name_0","test0.jpg"); $a_fail=array("my*name","","hkjhkl%jh","hkjh^"); do_test("filename",$a_pass,$a_fail); // TEST: words // $a_pass=array("name.xyz","name_0","test0.jpg"); $a_fail=array("my\nname",""); do_test("words",$a_pass,$a_fail); /* // TEST: text // $a_pass=array("name.xyz","name_0","test0.jpg"); $a_fail=array("my\nname",""); do_test("text",$a_pass,$a_fail); */ // TEST: title // $a_pass=array("name.xyz","cow's","test0.jpg"); $a_fail=array("my\nname",""); do_test("title",$a_pass,$a_fail); // TEST: digits $a_pass=array("1","0000002"); $a_fail=array("name","","0.1"); do_test("digits",$a_pass,$a_fail); // TEST: int $a_pass=array("1","0000002","-33"); $a_fail=array("name","","0..1","0.1"); do_test("int",$a_pass,$a_fail); // TEST: dec $a_pass=array("1","000000.2","-33"); $a_fail=array("name","","0..1"); do_test("dec",$a_pass,$a_fail); // TEST: hex $a_pass=array("abc","ff00ff","A2E"); $a_fail=array("my_name","","g0"); do_test("hex",$a_pass,$a_fail); } function do_test($type,$a_pass,$a_fail){ // PASS TEST: $n=0; foreach($a_pass as $e){ if(!rx_valid_data($e,$type)){ err_report("rx_valid_data() PASS TEST",$type.":".$n.": ".$e); } $n++; } // FAIL TEST: $n=0; foreach($a_fail as $e){ if(rx_valid_data($e,$type)){ err_report("rx_valid_data() FAIL TEST",$type.":".$n.": ".$e); } $n++; } } function err_report($s,$ref){ global $err_tot; $err_tot++; echo "FUNCS error(".$ref."): ".$s.BL; } Quote Link to comment Share on other sites More sharing options...
Jacques1 Posted September 16, 2015 Share Posted September 16, 2015 Don't stuff all validations into a single function. Make one function for each check, e. g. valid_email_address() for e-mail validation. Some checks are rather sloppy. For example, your “names” pattern permits strings consisting only of commas or whitespace, which is probably not what you want. A more reasonable pattern would be something like '/\\A[ \\t]*\\w+[ \\t]*(,[ \\t]*\\w+[ \\t]*)*\\z/' This checks for an actually comma-separated list of words, optionally surrounded by whitespace. Filenames shouldn't start with a dot, because this is interpreted as a hidden file. A single dot or double dots also have a special meaning. So I'd rather use '\\A\\w+([.]\\w+)*\\z' Why do you restrict the characters allowed in a password? This massively reduces the password strength for each given length, because there simply aren't many combinations to choose from. It's also extremely annoying for people who generate random passwords with a password manager, because they have to change their settings in order to comply with your policy. Passwords should have no restrictions at all or only very basic restrictions like “The word must be printable”. Artificially reducing the input alphabet is a bad idea. Quote Link to comment Share on other sites More sharing options...
secweb Posted September 17, 2015 Author Share Posted September 17, 2015 For my purposes hidden files are fine and allowed, also dot n double dot are handled elsewhere (and in my file manager are actually used). I like your name list, I will really look into that, but other checks are made as parsing the potential list which filter out empties and such. I'm hearing you on the passwords and will change it. All the tests are in a single function because this is part of a larger chain (form class, custom $_REQUEST wrapper), so in many cases the logic of deciding which test would be replicated elsewhere. I do however have the intent to change the strings to some form of enum, either via a class with constants or using defines (messy). Thankyou Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.