| ZInventor |
15-06-2009 21:00 |
Re: Site Design
Quote:
Originally Posted by Foster
(Post 863677)
It's interesting to see that your page information is in a simpler markup (like a wiki markup) to make it easier format the page. Did you write the parser / formatter from scratch?
Do you store the page in the database as the simplified markup?
|
The parser was written by us, in php, based off the wiki system (but we didn't like how that worked to we threw it away and built our own from the ground up)
everything is stored in a MySQL database in the non-parsed format.
here's some of the current (not cleaned up) code running the parser.
PHP Code:
function panda_parse($text) {
global $panda;
// If $text is in string form, split it into an array of lines.
if (is_string($text)) {
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
$text = explode("\n", $text);
}
// Prepare an array to hold the parsed lines.
$parsed = array();
// Set up a variable to keep track of what tag we're in.
$this_tag = '';
// Parse each line.
foreach ($text as $i => $line) {
// We'll need access to the lines on either side of the one we're parsing.
$prev_line = $text[$i - 1];
$next_line = $text[$i + 1];
// Prepare a few regular expressions to make things a bit neater later on.
$regex_table = '/^((!!|\|\|).+?)+(!!|\|\|)$/';
$regex_img = '/\{\{(.+?)\|(.+?)(\|(\d+)(\|(\d+))?)?(\|left|\|right|\|inline)?\}\}/';
$regex_just_img = '/^\{\{(.+?)\|(.+?)(\|(\d+)(\|(\d+))?)?(\|left|\|right|\|inline)?\}\}$/';
// Dashes.
while (preg_match('/ ?-- ?/', $line, $m)) {
$line = str_replace($m[0], '—', $line);
}
while (preg_match('/ - /', $line, $m)) {
$line = str_replace($m[0], ' – ', $line);
}
while (preg_match('/\d(-)\d/', $line, $m)) {
$line = str_replace($m[1], '–', $line);
}
// Tables.
if (preg_match($regex_table, $line)) {
if (preg_match($regex_table, $prev_line)) $beginning = "\t<tr>\n";
else $beginning = "<table class=\"parsed\">\n\t<tr>\n";
$end = "\t</tr>";
if (!preg_match($regex_table, $next_line)) $end .= "\n</table>";
while (preg_match('/((!!|\|\|)((\d+)[>^]){0,2}(.+?))(!!|\|\|)/', $line, $m)) {
//print '<pre>There is a table cell which is ' . $m[0] . '</pre>';
$cell_tag = 'td'; $align = $colspan = $rowspan = '';
if ($m[2] == '!!') $cell_tag = 'th';
if (preg_match('/^ +(.+?) +$/', $m[5], $m2)) {
$m[5] = $m2[1];
$align = ' class="center"';
}
else if (preg_match('/^ +(.+?)$/', $m[5], $m2)) {
$m[5] = $m2[1];
$align = ' class="right"';
}
else if (preg_match('/^(.+?) *$/', $m[5], $m2)) $m[5] = $m2[1];
if (preg_match('/(\d+)>/', $m[3], $m2)) $colspan = ' colspan="' . $m2[1] . '"';
if (preg_match('/(\d+)\^/', $m[3], $m2)) $rowspan = ' rowspan="' . $m2[1] . '"';
$line = str_replace($m[1], "\t\t<" . $cell_tag . $align . $colspan . $rowspan . '>' . $m[5] . '</' . $cell_tag . ">\n", $line);
}
$line = $beginning . $line . $end;
$line = str_replace("\n||\t</tr>", "\n\t</tr>", $line);
$line = str_replace("\n!!\t</tr>", "\n\t</tr>", $line);
}
// Bulleted and numbered lists.
else if (preg_match('/^([*#]+)/m', $line, $m)) {
$num_of_levels = strlen($m[1]);
$beginning = $end = '';
if (preg_match('/^[*#]{0,' . $num_of_levels . '}([^*#]|$)/m', $next_line)) $end = '</li>';
for ($i = $num_of_levels - 1; $i > 0; $i--) {
if ($prev_line{$i} == $m[1]{$i});
else if ($m[1]{$i} == '*') $beginning .= "<ul>\n";
else if ($m[1]{$i} == '#') $beginning .= "<ol>\n";
if ($next_line{$i} == $m[1]{$i});
else if ($m[1]{$i} == '*') $end .= "\n</ul></li>";
else if ($m[1]{$i} == '#') $end .= "\n</ol></li>";
}
if ($prev_line{0} == $m[1]{0});
else if ($m[1]{0} == '*') $beginning .= "<ul>\n";
else if ($m[1]{0} == '#') $beginning .= "<ol>\n";
if ($next_line{0} == $m[1]{0});
else if ($m[1]{0} == '*') $end .= "\n</ul>";
else if ($m[1]{0} == '#') $end .= "\n</ol>";
$line = $beginning . '<li>' . substr($line, $num_of_levels) . $end;
}
// Headings.
else if (preg_match('/^!!!/m', $line)) $line = '<h4>' . substr($line, 3) . '</h4>';
else if (preg_match('/^!!/m', $line)) $line = '<h3>' . substr($line, 2) . '</h3>';
else if (preg_match('/^!/m', $line)) $line = '<h2>' . substr($line, 1) . '</h2>';
// Blockquotes.
else if (preg_match('/^:/m', $line)) {
$line = '<blockquote>' . substr($line, 1);
$this_tag = 'blockquote';
}
// Paragraphs.
else if ($line != '' and $prev_line == '' and !preg_match($regex_just_img, $line) and $line{0} != '=') {
$line = '<p>' . $line;
$this_tag = 'p';
}
// No-paragraph lines.
else if ($line{0} == '=') $line = substr($line, 1);
// Close paragraph or blockquote.
if ($this_tag == true and $next_line == '') {
$line .= '</' . $this_tag . '>';
$this_tag = '';
}
// Bold italics.
while (preg_match("/'''''?(.+?)'''''?/", $line, $m)) {
$line = str_replace($m[0], '<b><i>' . $m[1] . '</i></b>', $line);
}
// Bold.
while (preg_match("/'''(.+?)'''/", $line, $m)) {
$line = str_replace($m[0], '<b>' . $m[1] . '</b>', $line);
}
// Italics.
while (preg_match("/''(.+?)''/", $line, $m)) {
$line = str_replace($m[0], '<i>' . $m[1] . '</i>', $line);
}
// External links.
while (preg_match('/\[\[(http[^|]+?)\|([^\]]+?)\]\]/', $line, $m)) {
$line = str_replace($m[0], '<a href="' . $m[1] . '" class="ext" target="_blank">' . $m[2] . '</a>', $line);
}
while (preg_match('/\[\[(http[^\]]+?)\]\]/', $line, $m)) {
$line = str_replace($m[0], '<a href="' . $m[1] . '" class="ext" target="_blank">' . $m[1] . '</a>', $line);
}
// Mailto links.
while (preg_match('/\[\[([^|]+?@[^|]+?)\|([^\]]+?)\]\]/', $line, $m)) {
print '<br>Mailto!<br>';
$line = str_replace($m[0], '<a href="mailto:' . $m[1] . '" class="mailto">' . $m[2] . '</a>', $line);
}
while (preg_match('/\[\[([^|]+?@[^|]+?)\]\]/', $line, $m)) {
$line = str_replace($m[0], '<a href="mailto:' . $m[1] . '" class="mailto">' . $m[1] . '</a>', $line);
}
// Internal links.
while (preg_match('/\[\[([a-z0-9\-_=&]+?)\|(.+?)\]\]/', $line, $m)) {
$line = str_replace($m[0], '<a href=".?' . $m[1] . '&page=' . $m[1] . '">' . $m[2] . '</a>', $line);
}
while (preg_match('/\[\[([a-z0-9\-_=&]+?)\]\]/', $line, $m)) {
$line = str_replace($m[0], '<a href=".?' . $m[1] . '&page=' . $m[1] . '">' . $m[1] . '</a>', $line);
}
// Images.
while (preg_match($regex_img, $line, $m)) {
if (!empty($m[4])) $width = ' width="' . $m[4] . '"';
if (!empty($m[6])) $height = ' height="' . $m[6] . '"';
if ($m[7] == '|inline') $side = 'inline';
else if ($m[7] == '|left') $side = 'left';
else $side = 'right';
$line = str_replace($m[0], '<img src="' . $panda['imgdir'] . $m[1] . '" alt="' . $m[2] . '" title="' . $m[2] . '"' . $width . $height . ' class="' . $side . '" />', $line);
}
// Add this line to the array of parsed lines.
$parsed[] = $line;
}
// Return a single string of HTML.
return join("\n", $parsed);
}
as you can see, it's pretty crazy, and takes a ton of debugging (the first 100 or so issues were forgotten semicolons!)
we're working on cleaning up this code, but anyone who wants it can use it, but PLEASE put a comment in the HTML of your code that says that you are using some of our stuff, thanks!
-Z
|