Difference between revisions of "Oo-regexp-export.php"
Line 2: | Line 2: | ||
# Export article to OO format | # Export article to OO format | ||
# - only does document.php transform and its own wiki markup | # - only does document.php transform and its own wiki markup | ||
+ | |||
+ | # Returns an OO-span element from a CSS-style-span | ||
+ | function replace_span($matches) { | ||
+ | $oo = ''; | ||
+ | $css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]); | ||
+ | foreach (explode(';', $css) as $style) { | ||
+ | list($att, $val) = explode(':', $style); | ||
+ | if ($att == 'color') $oo .= " fo:color=\"$val\""; | ||
+ | if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; | ||
+ | if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; | ||
+ | } | ||
+ | $oo = "<style:properties $oo/>"; | ||
+ | if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else { | ||
+ | $tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']); | ||
+ | $GLOBALS['oo-style-indexes'][$tN] = $oo; | ||
+ | } | ||
+ | return "<text:span text:style-name=\"T$tN\">"; | ||
+ | } | ||
+ | |||
+ | function replace_code($matches) { | ||
+ | $text = ''; | ||
+ | foreach (preg_split('/<br *\\/>/', $matches[1]) as $line) | ||
+ | $text .= "<text:p text:style-name=\"Code\">$line</text:p>\n"; | ||
+ | $text = preg_replace_callback('/<span style="(.*?)">/', 'replace_span', $text); | ||
+ | $text = preg_replace('/<\\/span>/', '</text:span>', $text); | ||
+ | return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n"; | ||
+ | } | ||
# Main wiki->oo parser | # Main wiki->oo parser | ||
function ooParse($text) { | function ooParse($text) { | ||
$script = $GLOBALS['xwScript']; | $script = $GLOBALS['xwScript']; | ||
− | $ | + | for ($i = 0; $i < 10; $i++) { |
− | + | $token = "token$i"; | |
− | + | $$token = ":-:$token:-:"; | |
+ | } | ||
# ;text (pre) | # ;text (pre) | ||
− | $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$ | + | $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text); |
− | |||
# *text (pre) | # *text (pre) | ||
− | $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$ | + | $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text); |
− | + | # <pre> (pre) | |
+ | $text = preg_replace("/<pre>(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text); | ||
+ | # no-wiki (pre) | ||
+ | $text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text); | ||
+ | # xwcode (pre) | ||
+ | $text = preg_replace('/<!-- xwcode-start --><div class="xwcode">(.*?)<.div><!-- xwcode-end -->/s', "\n\nXW$token3$1\n\n", $text); | ||
$xml = ''; | $xml = ''; | ||
− | foreach (explode("\n\n", trim | + | foreach (explode("\n\n", trim($text)) as $text) { |
− | + | if (!ereg($token3, $text)) { | |
− | + | $text = htmlentities($text); | |
− | + | # '''text''' | |
− | + | $text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text); | |
− | + | # ''text'' | |
− | + | $text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text); | |
− | + | # [[Wikipedia:article|anchor]] | |
− | + | $text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text); | |
− | + | # [[article|anchor]] | |
− | + | $text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text); | |
− | + | # [[article]] | |
− | + | $text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text); | |
− | + | # [url anchor] | |
− | + | $text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text); | |
− | + | # [url] | |
− | + | $text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text); | |
− | + | # <hr> (pre) | |
− | + | $text = str_replace('<hr>', $token4, $text); | |
− | + | } | |
− | |||
− | |||
− | |||
$xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n"; | $xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n"; | ||
} | } | ||
− | |||
# ;text (post) | # ;text (post) | ||
− | $xml = preg_replace("/(?<=>)$ | + | $xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml); |
− | |||
# *text (post) | # *text (post) | ||
− | $xml= preg_replace("/$ | + | $xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml); |
$xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml); | $xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml); | ||
$xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml); | $xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml); | ||
− | + | # no-wiki (post) | |
− | # <hr> | + | $xml = str_replace("NW$token3",'', $xml); |
− | $xml = str_replace( | + | # <hr> (post) |
− | $xml = str_replace( | + | $xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml); |
− | + | $xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml); | |
− | # | + | # spaces |
$xml = str_replace('&nbsp;', ' ', $xml); | $xml = str_replace('&nbsp;', ' ', $xml); | ||
− | + | $xml = str_replace(' ', ' ', $xml); | |
+ | $xml = preg_replace("/(( )+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml); | ||
+ | # tables (just remove for now) | ||
+ | $xml = preg_replace("/<\\/?(center|table|tr|th|td).*?>/i",'',$xml); | ||
+ | # convert xwcode and syntax colours | ||
+ | $xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml); | ||
return $xml; | return $xml; | ||
} | } | ||
$htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT']; | $htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT']; | ||
+ | $GLOBALS['oo-styles'] = array(); | ||
+ | $GLOBALS['oo-style-indexes'] = array(); | ||
$error = false; | $error = false; | ||
Line 80: | Line 114: | ||
$styles = str_replace('My-Header', "$script/$title", $styles); | $styles = str_replace('My-Header', "$script/$title", $styles); | ||
− | # | + | # Preserve problem characters so they don't get converted to entities |
− | $ | + | $cToken = '::--::'; |
− | $ | + | $specialChrs = array('…', '–', '‘', '’', '“', 'â€?'); |
− | + | foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article); | |
− | $article = | ||
# Build main document body | # Build main document body | ||
Line 105: | Line 138: | ||
# Insert the constructed body into the xml template | # Insert the constructed body into the xml template | ||
$content = str_replace('</office:body>', "\n$body\n</office:body>", $content); | $content = str_replace('</office:body>', "\n$body\n</office:body>", $content); | ||
+ | |||
+ | # Recover the special characters | ||
+ | foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content); | ||
+ | |||
+ | # Insert OO'd CSS styles | ||
+ | $ooStyles = ''; | ||
+ | foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n"; | ||
+ | $content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content); | ||
# Insert TOC msg into template | # Insert TOC msg into template | ||
Line 141: | Line 182: | ||
@unlink($sxw); | @unlink($sxw); | ||
@rmdir($tmp); | @rmdir($tmp); | ||
− | ?> | + | ?></pre> |
Revision as of 03:19, 20 August 2005
<?
- Export article to OO format
- - only does document.php transform and its own wiki markup
# Returns an OO-span element from a CSS-style-span function replace_span($matches) { $oo = ; $css = preg_replace("/\\s*([:;])\\s*/m",'$1',$matches[1]); foreach (explode(';', $css) as $style) { list($att, $val) = explode(':', $style); if ($att == 'color') $oo .= " fo:color=\"$val\""; if ($att == 'font-weight') $oo .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; if ($att == 'font-style') $oo .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; } $oo = "<style:properties $oo/>"; if (isset($GLOBALS['oo-styles'][$oo])) $tN = $GLOBALS['oo-styles'][$oo]; else { $tN = $GLOBALS['oo-styles'][$oo] = 5+count($GLOBALS['oo-styles']); $GLOBALS['oo-style-indexes'][$tN] = $oo; } return "<text:span text:style-name=\"T$tN\">"; }
function replace_code($matches) {
$text = ;
foreach (preg_split('/
/', $matches[1]) as $line)
$text .= "<text:p text:style-name=\"Code\">$line</text:p>\n";
$text = preg_replace_callback('//', 'replace_span', $text);
$text = preg_replace('/<\\/span>/', '</text:span>', $text);
return "</text:p>\n$text\n<text:p text:style-name=\"Text body\">\n";
}
# Main wiki->oo parser function ooParse($text) { $script = $GLOBALS['xwScript']; for ($i = 0; $i < 10; $i++) { $token = "token$i"; $$token = ":-:$token:-:"; } # ;text (pre) $text = preg_replace("/\\n;(.+?)\\n/m", "\n\n$token2\\1\n\n", $text); # *text (pre) $text = preg_replace("/\\n(\\*+) *(.+?)$/m", "$token1\\1\\2$token1", $text);
#
(pre) $text = preg_replace("/<pre>(.+?)<\\/pre>/es", '"\n\nPR$token3".htmlentities("$1")."\n\n"', $text); # no-wiki (pre) $text = preg_replace("/<[n]owiki>(.+?)<\\/nowiki>/s", "\n\nNW$token3$1\n\n", $text); # xwcode (pre) $text = preg_replace('/<!-- xwcode-start --><div class="xwcode">(.*?)<.div><!-- xwcode-end -->/s', "\n\nXW$token3$1\n\n", $text); $xml = ''; foreach (explode("\n\n", trim($text)) as $text) { if (!ereg($token3, $text)) { $text = htmlentities($text); # '''text''' $text = preg_replace("/'''(.+?)'''/s", "<text:span text:style-name=\"T2\">\\1</text:span>", $text); # ''text'' $text = preg_replace("/''(.+?)''/s", "<text:span text:style-name=\"T1\">\\1</text:span>", $text); # [[Wikipedia:article|anchor]] $text = preg_replace("/\\[\\[Wikipedia:([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"http://en.wikipedia.org/wiki/\\1\">\\2</text:a>", $text); # [[article|anchor]] $text = preg_replace("/\\[\\[([^\\|\\]]+)\\|([^\\]]*)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\2</text:a>", $text); # [[article]] $text = preg_replace("/\\[\\[([^\\]]+)]]/", "<text:a xlink:type=\"simple\" xlink:href=\"$script/\\1\">\\1</text:a>", $text); # [url anchor] $text = preg_replace("/\\[([^ \\]]+) +([^\\]]*)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\2</text:a>", $text); # [url] $text = preg_replace("/\\[([^\\]]+)]/", "<text:a xlink:type=\"simple\" xlink:href=\"\\1\">\\1</text:a>", $text); # <hr> (pre) $text = str_replace('<hr>', $token4, $text); } $xml .= "<text:p text:style-name=\"Text body\">$text</text:p>\n"; } # ;text (post) $xml = preg_replace("/(?<=>)$token2(.+)(?=<)/", "<text:span text:style-name=\"T2\">\\1</text:span>", $xml); # *text (post) $xml= preg_replace("/$token1\\*+(.*?)$token1/m", "<text:list-item><text:p text:style-name=\"P12\">\\1</text:p></text:list-item>", $xml); $xml= preg_replace("/(?<!<\\/text:list-item>)(<text:list-item>)/", "</text:p>\n<text:unordered-list text:style-name=\"L1\">\\1", $xml); $xml= preg_replace("/(<\\/text:list-item>)(?!<text:list-item>)/", "\\1</text:unordered-list>\n<text:p text:style-name=\"Text body\">", $xml); # no-wiki (post) $xml = str_replace("NW$token3",'', $xml); # <hr> (post) $xml = str_replace("$token4</text:p>", "</text:p>\n<text:p text:style-name=\"P13\" />\n", $xml); $xml = str_replace($token4, "</text:p>\n<text:p text:style-name=\"P13\" />\n<text:p text:style-name=\"Text body\">", $xml); # spaces $xml = str_replace(' ', ' ', $xml); $xml = str_replace(' ', ' ', $xml); $xml = preg_replace("/(( )+)/e",'"<text:s text:c=\"".(strlen("$1")/6)."\"/>"',$xml); # tables (just remove for now) $xml = preg_replace("/<\\/?(center|table|tr|th|td).*?>/i",'',$xml); # convert xwcode and syntax colours $xml = preg_replace_callback("/..$token3(.+?)(?=<\\/text:p>)/s", 'replace_code', $xml); return $xml; } $htdocs = $GLOBALS['_SERVER']['DOCUMENT_ROOT']; $GLOBALS['oo-styles'] = array(); $GLOBALS['oo-style-indexes'] = array(); $error = false; # Create a unique working folder to build zip in mkdir($tmp = "$htdocs/wiki/tmp/".uniqid('export')); $sxw = "$tmp/sxw"; # Zip compression library - www.phpconcept.net/pclzip/ define('PCLZIP_TEMPORARY_DIR', "$tmp/"); require_once("$htdocs/wiki/xmlwiki/lib/pclzip/pclzip.lib.php"); # Get OO content and styles template articles $content = xwArticleContent('oo-doc-content.xml'); $styles = xwArticleContent('oo-doc-styles.xml'); # Insert our document info into existing tokens $content = str_replace('Title', str_replace('_',' ',$title), $content); $content = str_replace('Version', 'Exported by '.$GLOBALS['xwUserName'].' on '.date('d M Y').' at '.date('H:i:s'), $content); $styles = str_replace('My-Header', "$script/$title", $styles); # Preserve problem characters so they don't get converted to entities $cToken = '::--::'; $specialChrs = array('…', '–', '‘', '’', '“', '�'); foreach ($specialChrs as $i => $c) $article = str_replace($c, "$i$cToken", $article); # Build main document body # - the match splits the article into sections at headings, extracting heading, level & text $body = ''; $last = 0; $toclist = array(); if (preg_match_all('/<h1><tmp num="(#*)" title="(.*?)" anchor="(.*?)".><.h1>/s', $article, $sections, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { foreach ($sections as $m) { $level = strlen($m[1][0]); $anchor = htmlentities($m[3][0]); if ($i = $m[0][1] - $last) $body .= ooParse(substr($article, $last, $i)); $style = $level > 1 ? "Heading $level" : 'P11'; if ($level && $level < 3) $toclist[] = array('P'.($level+7), $anchor); if ($anchor) $body .= "<text:h text:style-name=\"$style\" text:level=\"$level\">$anchor</text:h>\n"; $last = $m[0][1] + strlen($m[0][0]); } } # Handle last section (or all if there were no headings if ($i = strlen($article) - $last) $body .= ooParse(substr($article, $last, $i)); # Insert the constructed body into the xml template $content = str_replace('</office:body>', "\n$body\n</office:body>", $content); # Recover the special characters foreach ($specialChrs as $i => $c) $content = str_replace("$i$cToken", $c, $content); # Insert OO'd CSS styles $ooStyles = ''; foreach ($GLOBALS['oo-styles'] as $style => $i) $ooStyles .= "<style:style style:name=\"T$i\" style:family=\"text\">\n$style\n</style:style>\n"; $content = str_replace('</office:automatic-styles>', "$ooStyles\n</office:automatic-styles>", $content); # Insert TOC msg into template $toc = "<text:p text:style-name=\"P8\">Contents is out of date, please update fields from tools menu\n<text:tab-stop/>???</text:p>\n"; $content = str_replace('</text:index-body>', "$toc\n</text:index-body>", $content); # Dump the content & styles templates to files in the working folder $fh = fopen($file1 = "$tmp/content.xml",'w+'); fwrite($fh, $content); fclose($fh); $fh = fopen($file2 = "$tmp/styles.xml",'w+'); fwrite($fh, $styles); fclose($fh); # Add the files to the sxw archive... $zip = new PclZip($sxw); if ($zip->create( array($file1, $file2), PCLZIP_OPT_REMOVE_ALL_PATH )) { # ...and send as raw data back to browser as title.sxw ob_end_clean(); header('Content-type: application/zip'); header("Content-Disposition: attachment; filename=\"$title.sxw\""); @readfile($sxw); # Clean up and die @unlink($file1); @unlink($file2); @unlink($sxw); @rmdir($tmp); die; } else $error = $zip->errorInfo(true); # Error, clean up files xwMessage($error,'red'); @unlink($file1); @unlink($file2); @unlink($sxw); @rmdir($tmp); ?>