In the end I figured out a way to do this myself, and ended up with something which does a few other neat things, so I thought I'd share the code here.
This code will do the following:-
- replace any email address in a post with a spambot proof version of the email address
- turn any web address beginning http:// or www. into a proper hyperlink
- for text written in a word processor and pasted into MT, replace curly quotes with proper HTML
- allow insertion of images in an article without html
To use, you need to add the php code at the bottom of this post to the top of your template file(s), and then insert a line of php code immediately before text you want processed, and another line immediately after. For example, in my case I did the following:
CODE
<?php ob_start("parse");?>
<$MTEntryBody$>
<?php ob_end_flush();?>
which means that all the text in the MT article body is processed. You can put any HTML or MT tags there instead.<$MTEntryBody$>
<?php ob_end_flush();?>
To use this code, you'll probably need to edit a few bits - particularly the lines at the end of the place_image function where the actual HTML that is output is defined.
Lastly, if you want a bit of documentation on using the insert image command, I put it online here.
Here's the php code:
CODE
<?php
function email ($a,$b="",$echotext=TRUE) {
// function takes an email address and optional name and encodes using javascript in a way which is difficult for spambots to harvest
// if browser has javascript turned off, will instead pop up a form which similarly hides email address
// requires the file /inc/contact.php for form for browsers with javascript disabled
// $a = email address
// $b = user's real name (if absent, will use scrambled email address)
// $c = user
// $d = domain
// $echotext: if true, function echos output, otherwise returns output
// set $noscript to true in url to test behaviour for non javascript browsers
global $noscript;
if ($a == "") return " <b>Error: email address not specified</b> ";
if ($b == "") $bb = "document.write(b)";
$atpos = strpos($a,"@");
if ($atpos === false) return " <b>Error:email address invalid</b> ";
$user = substr ($a,0,$atpos);
for ($i = 0; $i < strlen($user); $i++) {
$c.=''.ord(substr($user,$i,1));
}
$domain = substr ($a,$atpos+1);
for ($i = 0; $i < strlen($domain); $i++) {
$d.=''.ord(substr($domain,$i,1));
}
$output = <<<END
<script>
<!--
var a,b,c
a='<a href=\"mai'
b='$c'
c='\">'
a+='lto:'
b+='@'
b+='$d'
document.write(a+b+c)
$bb
-->
</script>
$b</a>
END;
//direct user to pop up contact form if javascript not enabled
$cc = urlencode($c);
$dd = urlencode($d);
$args = "d=$dd&c=$cc";
if ($b=="") {
$noscriptscript="(click <a href=\"/inc/contact.php?$args&form=1\">here</a> to send message)";
} else {
$noscriptscript="<a href=\"/inc/contact.php?$args&form=1\">$b</a>";
}
if ($noscript) {
//for testing call with noscript=1
$output .= "<b>-NOSCRIPT-</b>$noscriptscript<b>-/NOSCRIPT-</b>\n";
} else {
$output .= "<noscript>$noscriptscript</noscript>\n";
}
if ($echotext) {
echo($output);
} else {
return($output);
}
}
function convert_links ($text) {
// Converts all occurences of typed in URL in $text to proper hyperlinks
// original script from www.php.net function reference for preg_replace
// http://www.php.net/manual/en/function.preg-replace.php
// First match things beginning with http:// (or other protocols)
$not_anchor = '(?<!"|href=|href\s=\s|href=\s|href\s=)';
$protocol = '(http|ftp|https):\/\/';
$domain = '[\w]+(.[\w]+)';
$subdir = '([\w\-\.;,@?^=%&:\/~\+#]*[\w\-\@?^=%&\/~\+#])?';
$expr = '/' . $not_anchor . $protocol . $domain . $subdir . '/i';
$text = preg_replace( $expr, "<a href='$0' target='link'>[remove_http]$0</a>", $text );
// Now match things beginning with www.
$not_anchor = '(?<!"|href=|href\s=\s|href=\s|href\s=)';
$not_http = '(?<!:\/\/)';
$domain = 'www(.[\w]+)';
$subdir = '([\w\-\.;,@?^=%&:\/~\+#]*[\w\-\@?^=%&\/~\+#])?';
$expr = '/' . $not_anchor . $not_http . $domain . $subdir . '/i';
$text=preg_replace( $expr, "<a href='http://$0' target='link'>$0</a>", $text );
// above will add <a href> to web address already in <a> tag, so remove double tags
$expr = '+<a href[^>]*>(<a href.*</a>)</a>+i';
$text = preg_replace($expr,"$1", $text);
// remove http:// from link address that displays on page
$text = preg_replace( '+\[remove_http\]http://+i','', $text);
return $text;
}
function place_image($command) {
//converts {image=[image filename] align=[left|right|center] caption=[caption in quotes] [wrap] [border]} to correct html
$command=stripslashes($command);
//parse out individual parameters:
//get image filename
preg_match('/image ?= ?("([^"])*"|([^\s}]*))/i',$command,$image);
$image=trim($image[1],'"');
//get align
preg_match('/[^\S]align ?= ?("([^"])*"|([^\s}]*))/i',$command,$align);
$align=trim($align[1],'"');
$align=str_replace("centre","center",$align);
if($align=="") $align="left";
//get height and width
preg_match('/[^\S]width ?= ?("([^"])*"|([^\s}]*))/i',$command,$width);
$width=trim($width[1],'"');
preg_match('/[^\S]height ?= ?("([^"])*"|([^\s}]*))/i',$command,$height);
$height=trim($height[1],'"');
//define width if not specified to ensure that caption wraps within image width
if ($width=="") {
$size = getimagesize($image);
if($height=="") {
$width = $size[0];
} else {
$width = $size[0] * $height / $size[1];
}
}
//get caption
preg_match('/[^\S]caption ?= ?("([^"]|[\"])*"|([^\s}]*))/i',$command,$caption);
$caption=stripslashes(trim($caption[1],'"'));
//remove anything in quotes, so we don't mistake anything in quotes as a command
$regexp[]='/\x5C"/';
$replace[]='';
$regexp[]='/"[^"]*"/';
$replace[]='"XXX"';
$command = preg_replace($regexp,$replace,$command);
$float = strpos($command,"wrap");
$border = strpos($command,"border");
if ($align=="center") $float=FALSE; //can't float if center aligned
$box_width = $width;
//modify styles and HTML output as appropriate for your layout
$style = "text-align:$align;";
if($float) {
$pad = "left";
if ($align=="left") $pad = "right";
$style .= "float:$align;margin-$pad:5px;";
} else {
$style .= "align:$align";
}
if($border) {
$style .= "border: 1px dotted #DDD;padding:5px";
$box_width = $box_width + 10;
}
$style .= "width:".($box_width)."px;";
$output="<div class=\"entryimage\" align=center style=\"$style\">";
$output.="<img src=\"$image\" ";
if($width<>"") $output.="width=$width ";
if($height<>"") $output.="height=$height ";
$output.="border=0>";
if($caption<>"") $output.="<br /><div style=\"text-align:center;width:$width;\"><i>$caption</i></div>";
$output.="</div>";
return($output);
}
function parse($buffer) {
//regexp for replacing emails with email() function
$search[]="/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]{2,})+/ie";
$replace[]="email('<!--POST BOX-->','',FALSE);";
//regexp for replacing {image} with IMG html code by calling place_image() function
$search[]="/{image[^}]*}/ie";
$replace[]="place_image('<!--POST BOX-->');";
//replace curly single quotes with straight ones
$search[]="/‘|’/";
$replace[]="'";
//replace curly quotes with correct html code
$search[]="/“/";
$replace[]="";
$search[]="/�/";
$replace[]="";
$buffer = preg_replace($search,$replace,$buffer);
$buffer = preg_replace($search,$replace,$buffer);
//convert URL's to links
$buffer=convert_links($buffer);
return($buffer);
}
?>
function email ($a,$b="",$echotext=TRUE) {
// function takes an email address and optional name and encodes using javascript in a way which is difficult for spambots to harvest
// if browser has javascript turned off, will instead pop up a form which similarly hides email address
// requires the file /inc/contact.php for form for browsers with javascript disabled
// $a = email address
// $b = user's real name (if absent, will use scrambled email address)
// $c = user
// $d = domain
// $echotext: if true, function echos output, otherwise returns output
// set $noscript to true in url to test behaviour for non javascript browsers
global $noscript;
if ($a == "") return " <b>Error: email address not specified</b> ";
if ($b == "") $bb = "document.write(b)";
$atpos = strpos($a,"@");
if ($atpos === false) return " <b>Error:email address invalid</b> ";
$user = substr ($a,0,$atpos);
for ($i = 0; $i < strlen($user); $i++) {
$c.=''.ord(substr($user,$i,1));
}
$domain = substr ($a,$atpos+1);
for ($i = 0; $i < strlen($domain); $i++) {
$d.=''.ord(substr($domain,$i,1));
}
$output = <<<END
<script>
<!--
var a,b,c
a='<a href=\"mai'
b='$c'
c='\">'
a+='lto:'
b+='@'
b+='$d'
document.write(a+b+c)
$bb
-->
</script>
$b</a>
END;
//direct user to pop up contact form if javascript not enabled
$cc = urlencode($c);
$dd = urlencode($d);
$args = "d=$dd&c=$cc";
if ($b=="") {
$noscriptscript="(click <a href=\"/inc/contact.php?$args&form=1\">here</a> to send message)";
} else {
$noscriptscript="<a href=\"/inc/contact.php?$args&form=1\">$b</a>";
}
if ($noscript) {
//for testing call with noscript=1
$output .= "<b>-NOSCRIPT-</b>$noscriptscript<b>-/NOSCRIPT-</b>\n";
} else {
$output .= "<noscript>$noscriptscript</noscript>\n";
}
if ($echotext) {
echo($output);
} else {
return($output);
}
}
function convert_links ($text) {
// Converts all occurences of typed in URL in $text to proper hyperlinks
// original script from www.php.net function reference for preg_replace
// http://www.php.net/manual/en/function.preg-replace.php
// First match things beginning with http:// (or other protocols)
$not_anchor = '(?<!"|href=|href\s=\s|href=\s|href\s=)';
$protocol = '(http|ftp|https):\/\/';
$domain = '[\w]+(.[\w]+)';
$subdir = '([\w\-\.;,@?^=%&:\/~\+#]*[\w\-\@?^=%&\/~\+#])?';
$expr = '/' . $not_anchor . $protocol . $domain . $subdir . '/i';
$text = preg_replace( $expr, "<a href='$0' target='link'>[remove_http]$0</a>", $text );
// Now match things beginning with www.
$not_anchor = '(?<!"|href=|href\s=\s|href=\s|href\s=)';
$not_http = '(?<!:\/\/)';
$domain = 'www(.[\w]+)';
$subdir = '([\w\-\.;,@?^=%&:\/~\+#]*[\w\-\@?^=%&\/~\+#])?';
$expr = '/' . $not_anchor . $not_http . $domain . $subdir . '/i';
$text=preg_replace( $expr, "<a href='http://$0' target='link'>$0</a>", $text );
// above will add <a href> to web address already in <a> tag, so remove double tags
$expr = '+<a href[^>]*>(<a href.*</a>)</a>+i';
$text = preg_replace($expr,"$1", $text);
// remove http:// from link address that displays on page
$text = preg_replace( '+\[remove_http\]http://+i','', $text);
return $text;
}
function place_image($command) {
//converts {image=[image filename] align=[left|right|center] caption=[caption in quotes] [wrap] [border]} to correct html
$command=stripslashes($command);
//parse out individual parameters:
//get image filename
preg_match('/image ?= ?("([^"])*"|([^\s}]*))/i',$command,$image);
$image=trim($image[1],'"');
//get align
preg_match('/[^\S]align ?= ?("([^"])*"|([^\s}]*))/i',$command,$align);
$align=trim($align[1],'"');
$align=str_replace("centre","center",$align);
if($align=="") $align="left";
//get height and width
preg_match('/[^\S]width ?= ?("([^"])*"|([^\s}]*))/i',$command,$width);
$width=trim($width[1],'"');
preg_match('/[^\S]height ?= ?("([^"])*"|([^\s}]*))/i',$command,$height);
$height=trim($height[1],'"');
//define width if not specified to ensure that caption wraps within image width
if ($width=="") {
$size = getimagesize($image);
if($height=="") {
$width = $size[0];
} else {
$width = $size[0] * $height / $size[1];
}
}
//get caption
preg_match('/[^\S]caption ?= ?("([^"]|[\"])*"|([^\s}]*))/i',$command,$caption);
$caption=stripslashes(trim($caption[1],'"'));
//remove anything in quotes, so we don't mistake anything in quotes as a command
$regexp[]='/\x5C"/';
$replace[]='';
$regexp[]='/"[^"]*"/';
$replace[]='"XXX"';
$command = preg_replace($regexp,$replace,$command);
$float = strpos($command,"wrap");
$border = strpos($command,"border");
if ($align=="center") $float=FALSE; //can't float if center aligned
$box_width = $width;
//modify styles and HTML output as appropriate for your layout
$style = "text-align:$align;";
if($float) {
$pad = "left";
if ($align=="left") $pad = "right";
$style .= "float:$align;margin-$pad:5px;";
} else {
$style .= "align:$align";
}
if($border) {
$style .= "border: 1px dotted #DDD;padding:5px";
$box_width = $box_width + 10;
}
$style .= "width:".($box_width)."px;";
$output="<div class=\"entryimage\" align=center style=\"$style\">";
$output.="<img src=\"$image\" ";
if($width<>"") $output.="width=$width ";
if($height<>"") $output.="height=$height ";
$output.="border=0>";
if($caption<>"") $output.="<br /><div style=\"text-align:center;width:$width;\"><i>$caption</i></div>";
$output.="</div>";
return($output);
}
function parse($buffer) {
//regexp for replacing emails with email() function
$search[]="/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]{2,})+/ie";
$replace[]="email('<!--POST BOX-->','',FALSE);";
//regexp for replacing {image} with IMG html code by calling place_image() function
$search[]="/{image[^}]*}/ie";
$replace[]="place_image('<!--POST BOX-->');";
//replace curly single quotes with straight ones
$search[]="/‘|’/";
$replace[]="'";
//replace curly quotes with correct html code
$search[]="/“/";
$replace[]="";
$search[]="/�/";
$replace[]="";
$buffer = preg_replace($search,$replace,$buffer);
$buffer = preg_replace($search,$replace,$buffer);
//convert URL's to links
$buffer=convert_links($buffer);
return($buffer);
}
?>