Difference between revisions of "Oo-regexp-export.php"

From Organic Design wiki
Line 2: Line 2:
 
# Export article to OO format
 
# Export article to OO format
 
# - only does document.php transform and its own wiki markup
 
# - only does document.php transform and its own wiki markup
 +
 +
# Returns an OO-span element from a CSS-style-span
 +
function replace_span($matches) {
 +
$oo = '';
 +
$css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]);
 +
foreach (explode(';', $css) as $style) {
 +
list($att, $val) = explode(':', $style);
 +
if ($att == 'color') $oo .= " fo:color=\"$val\"";
 +
if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"';
 +
if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"';
 +
}
 +
$oo = "<style:properties $oo/>";
 +
if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else {
 +
$tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']);
 +
$GLOBALS['oo-style-indexes'][$tN] = $oo;
 +
}
 +
return "<text:span text:style-name=\"T$tN\">";
 +
}
 +
 +
function replace_code($matches) {
 +
$text = '';
 +
foreach (preg_split('/<br *\\/>/', $matches[1]) as $line)
 +
$text .= "<text:p text:style-name=\"Code\">$line</text:p>\n";
 +
$text = preg_replace_callback('/<span style="(.*?)">/', 'replace_span', $text);
 +
$text = preg_replace('/<\\/span>/', '</text:span>', $text);
 +
return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n";
 +
}
  
 
# Main wiki->oo parser
 
# Main wiki->oo parser
 
function ooParse($text) {
 
function ooParse($text) {
 
$script = $GLOBALS['xwScript'];
 
$script = $GLOBALS['xwScript'];
$bToken = ':-:'.'bullet'.':-:';
+
for ($i = 0; $i < 10; $i++) {
$hToken = ':-:'.'heading'.':-:';
+
$token = "token$i";
 
+
$$token = ":-:$token:-:";
 +
}
 
# ;text (pre)
 
# ;text (pre)
$text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$hToken\\1\n\n", $text);
+
$text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text);
 
 
 
# *text (pre)
 
# *text (pre)
$text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$bToken\\1\\2$bToken", $text);
+
$text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text);
 
+
# <pre> (pre)
 +
$text = preg_replace("/<pre>(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text);
 +
# no-wiki (pre)
 +
$text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text);
 +
# xwcode (pre)
 +
$text = preg_replace('/<!-- xwcode-start --><div class="xwcode">(.*?)<.div><!-- xwcode-end -->/s', "\n\nXW$token3$1\n\n", $text);
 
$xml = '';
 
$xml = '';
foreach (explode("\n\n", trim(htmlentities($text))) as $text) {
+
foreach (explode("\n\n", trim($text)) as $text) {
 
+
if (!ereg($token3, $text)) {
# '''text'''
+
$text = htmlentities($text);
$text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);
+
# '''text'''
+
$text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);
# ''text''
+
# ''text''
$text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);
+
$text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);
+
# [[Wikipedia:article|anchor]]
# [[Wikipedia:article|anchor]]
+
$text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);
$text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);
+
# [[article|anchor]]
+
$text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);
# [[article|anchor]]
+
# [[article]]
$text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);
+
$text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);
+
# [url anchor]
# [[article]]
+
$text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);
$text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);
+
# [url]
+
$text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);
# [url anchor]
+
# <hr> (pre)
$text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);
+
$text = str_replace('&lt;hr&gt;', $token4, $text);
+
}
# [url]
 
$text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);
 
 
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
 
}
 
}
 
 
# ;text (post)
 
# ;text (post)
$xml = preg_replace("/(?<=>)$hToken(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
+
$xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
 
 
 
# *text (post)
 
# *text (post)
$xml= preg_replace("/$bToken\\*+(.*?)$bToken/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml);
+
$xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml);
 
$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml);
 
$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml);
 
$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);
 
$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);
 
+
# no-wiki (post)
# <hr>
+
$xml = str_replace("NW$token3",'', $xml);
$xml = str_replace('&lt;hr&gt;</text:p>', "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml);
+
# <hr> (post)
$xml = str_replace('&lt;hr&gt;', "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);
+
$xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml);
 
+
$xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);
# &nbsp;
+
# spaces
 
$xml = str_replace('&amp;nbsp;', '&nbsp;', $xml);
 
$xml = str_replace('&amp;nbsp;', '&nbsp;', $xml);
 
+
$xml = str_replace('&nbsp; ', '&nbsp;&nbsp;', $xml);
 +
$xml = preg_replace("/((&nbsp;)+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml);
 +
# tables (just remove for now)
 +
$xml = preg_replace("/&lt;\\/?(center|table|tr|th|td).*?&gt;/i",'',$xml);
 +
# convert xwcode and syntax colours
 +
$xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml);
 
return $xml;
 
return $xml;
 
}
 
}
  
 
$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT'];
 
$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT'];
 +
$GLOBALS['oo-styles'] = array();
 +
$GLOBALS['oo-style-indexes'] = array();
 
$error = false;
 
$error = false;
  
Line 80: Line 114:
 
$styles = str_replace('My-Header', "$script/$title", $styles);
 
$styles = str_replace('My-Header', "$script/$title", $styles);
  
# Convert problem characters (should make them work instead!)
+
# Preserve problem characters so they don't get converted to entities
$article = str_replace('’',"'",$article);
+
$cToken = '::--::';
$article = str_replace('…','...',$article);
+
$specialChrs = array('…', '–', '‘', '’', '“', '�');
$article = str_replace('–','-',$article);
+
foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article);
$article = preg_replace('/“|�/','"',$article);
 
  
 
# Build main document body
 
# Build main document body
Line 105: Line 138:
 
# Insert the constructed body into the xml template
 
# Insert the constructed body into the xml template
 
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
 
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);
 +
 +
# Recover the special characters
 +
foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content);
 +
 +
# Insert OO'd CSS styles
 +
$ooStyles = '';
 +
foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n";
 +
$content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content);
  
 
# Insert TOC msg into template
 
# Insert TOC msg into template
Line 141: Line 182:
 
@unlink($sxw);
 
@unlink($sxw);
 
@rmdir($tmp);
 
@rmdir($tmp);
?>
+
?></pre>

Revision as of 03:19, 20 August 2005

<?

  1. Export article to OO format
  2. - only does document.php transform and its own wiki markup

# Returns an OO-span element from a CSS-style-span function replace_span($matches) { $oo = ; $css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]); foreach (explode(';', $css) as $style) { list($att, $val) = explode(':', $style); if ($att == 'color') $oo .= " fo:color=\"$val\""; if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; } $oo = "<style:properties $oo/>"; if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else { $tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']); $GLOBALS['oo-style-indexes'][$tN] = $oo; } return "<text:span text:style-name=\"T$tN\">"; }

function replace_code($matches) { $text = ; foreach (preg_split('/
/', $matches[1]) as $line) $text .= "<text:p text:style-name=\"Code\">$line</text:p>\n"; $text = preg_replace_callback('//', 'replace_span', $text); $text = preg_replace('/<\\/span>/', '</text:span>', $text); return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n"; }

# Main wiki->oo parser function ooParse($text) { $script = $GLOBALS['xwScript']; for ($i = 0; $i < 10; $i++) { $token = "token$i"; $$token = ":-:$token:-:"; } # ;text (pre) $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text); # *text (pre) $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text);

#

 (pre)
		$text = preg_replace("/<pre>(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text);
		# no-wiki (pre)
		$text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text);
		# xwcode (pre)
		$text = preg_replace('/<!-- xwcode-start --><div class="xwcode">(.*?)<.div><!-- xwcode-end -->/s', "\n\nXW$token3$1\n\n", $text);
		$xml = '';
		foreach (explode("\n\n", trim($text)) as $text) {
			if (!ereg($token3, $text)) {
				$text = htmlentities($text);
				# '''text'''
				$text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text);
				# ''text''
				$text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text);
				# [[Wikipedia:article|anchor]]
				$text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text);
				# [[article|anchor]]
				$text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text);
				# [[article]]
				$text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text);
				# [url anchor]
				$text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text);
				# [url]
				$text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text);
				# <hr> (pre)
				$text = str_replace('<hr>', $token4, $text);
				}
			$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n";
			}
		# ;text (post)
		$xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml);
		# *text (post)
		$xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml);
		$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml);
		$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml);
		# no-wiki (post)
		$xml = str_replace("NW$token3",'', $xml);
		# <hr> (post)
		$xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml);
		$xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml);
		# spaces
		$xml = str_replace('&nbsp;', ' ', $xml);
		$xml = str_replace('  ', '  ', $xml);
		$xml = preg_replace("/(( )+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml);
		# tables (just remove for now)
		$xml = preg_replace("/<\\/?(center|table|tr|th|td).*?>/i",'',$xml);
		# convert xwcode and syntax colours
		$xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml);
		return $xml;
		}

$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT'];
$GLOBALS['oo-styles'] = array();
$GLOBALS['oo-style-indexes'] = array();
$error = false;

# Create a unique working folder to build zip in
mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export'));
$sxw = "$tmp/sxw";

# Zip compression library - www.phpconcept.net/pclzip/
define('PCLZIP_TEMPORARY_DIR', "$tmp/");
require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php");

# Get OO content and styles template articles
$content = xwArticleContent('oo-doc-content.xml');
$styles = xwArticleContent('oo-doc-styles.xml');

# Insert our document info into existing tokens
$content = str_replace('Title', str_replace('_',' ',$title), $content);
$content = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $content);
$styles = str_replace('My-Header', "$script/$title", $styles);

# Preserve problem characters so they don't get converted to entities
$cToken = '::--::';
$specialChrs = array('…', '–', '‘', '’', '“', '�');
foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article);

# Build main document body
# - the match splits the article into sections at headings, extracting heading, level & text
$body = ''; $last = 0; $toclist = array();
if (preg_match_all('/<h1><tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
	foreach ($sections as $m) {
		$level = strlen($m[1][0]);
		$anchor = htmlentities($m[3][0]);
		if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i));
		$style = $level > 1 ? "Heading $level" : 'P11';
		if ($level && $level < 3) $toclist[] = array('P'.($level+7), $anchor);
		if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n";
		$last = $m[0][1] + strlen($m[0][0]);
		}
	}
# Handle last section (or all if there were no headings
if ($i = strlen($article) - $last) $body .= ooParse(substr($article, $last, $i));

# Insert the constructed body into the xml template
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content);

# Recover the special characters
foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content);

# Insert OO'd CSS styles
$ooStyles = '';
foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n";
$content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content);

# Insert TOC msg into template
$toc = "<text:p text:style-name=\"P8\">Contents is out of date, please update fields from tools menu\n<text:tab-stop/>???</text:p>\n";
$content = str_replace('</text:index-body>', "$toc\n</text:index-body>", $content);

# Dump the content & styles templates to files in the working folder
$fh = fopen($file1 = "$tmp/content.xml",'w+');
fwrite($fh, $content);
fclose($fh);
$fh = fopen($file2 = "$tmp/styles.xml",'w+');
fwrite($fh, $styles);
fclose($fh);

# Add the files to the sxw archive...
$zip = new PclZip($sxw);
if ($zip->create( array($file1, $file2), PCLZIP_OPT_REMOVE_ALL_PATH )) {
	# ...and send as raw data back to browser as title.sxw
	ob_end_clean();
	header('Content-type: application/zip');
	header("Content-Disposition: attachment; filename=\"$title.sxw\"");
	@readfile($sxw);
	# Clean up and die
	@unlink($file1);
	@unlink($file2);
	@unlink($sxw);
	@rmdir($tmp);
	die;
	}
else $error = $zip->errorInfo(true);

# Error, clean up files
xwMessage($error,'red');
@unlink($file1);
@unlink($file2);
@unlink($sxw);
@rmdir($tmp);
?>