markdown.awk/markdown.awk

322 lines
5.6 KiB
Awk
Raw Normal View History

2021-06-27 18:06:41 +00:00
BEGIN {
2021-07-10 13:29:16 +00:00
body = ""
in_code = 0
2021-06-27 18:06:41 +00:00
}
2021-07-10 22:45:07 +00:00
function parse_header(str, hnum, content) {
if (substr(str, 1, 1) == "#") {
match(str, /#+/);
hnum = RLENGTH;
2021-06-27 18:06:41 +00:00
2021-07-10 22:45:07 +00:00
content = parse_line(substr(str, hnum + 1, length(str) - hnum ));
return "<h" hnum ">" content "</h" hnum ">";
}
if (match(body, /^[^\n]+\n=+$/)) {
gsub(/\n=+$/, "", str);
return "<h1>" parse_line(str) "</h1>"
}
if (match(body, /^[^\n]+\n-+$/)) {
gsub(/\n-+$/, "", str);
return "<h2>" parse_line(str) "</h2>"
}
return "";
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function read_line(str, pos, res, i) {
res = "";
for (i=pos; i<=length(str); i++) {
if (substr(str, i, 1) == "\n")
return res;
res = res substr(str, i, 1);
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return res;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function find(str, s, i, sl, j) {
sl = length(s);
for (j = i; j <= length(str); j++) {
if (substr(str, j, sl) == s)
return j;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function startswith(str, s, sl, j) {
sl = length(s);
for (j = 1; j <= length(str); j++) {
if (substr(str, j, sl) == s)
return j;
if (substr(str, j, 1) != " ")
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
function rstrip(str) {
gsub(/ *\n*$/, "", str);
return str;
}
function lstrip(str) {
gsub(/^ *\n*/, "", str);
return str;
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
function join_lines(first, second, sep) {
if (sep == "")
sep = " ";
if (second == "")
return first;
if (first == "")
return second;
return first sep second;
}
function strip_list(str) {
gsub(/^ *\* /, "", str);
gsub(/^ *[[:digit:]]*\. /, "", str);
return str;
}
function parse_list(str, buf, result, i, ind, line, lines, indent, is_bullet) {
result = "";
2021-07-10 13:29:16 +00:00
buf = "";
2021-06-27 18:06:41 +00:00
2021-07-10 13:29:16 +00:00
split(str, lines, "\n");
2021-06-27 18:06:41 +00:00
2021-07-10 17:13:17 +00:00
str = ""
2021-07-10 13:29:16 +00:00
for (i in lines) {
line = lines[i];
2021-07-10 17:13:17 +00:00
if (match(line, / *\* /) || match(line, / *[[:digit:]]+\. /))
str = join_lines(str, line, "\n");
else
str = join_lines(rstrip(str), lstrip(line), " ");
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
split(str, lines, "\n")
indent = match(str, /[^ ]/);
is_bullet = match(str, /^ *\* /)
2021-07-10 17:33:33 +00:00
if (is_bullet)
result = "<ul>\n"
else
result = "<ol>\n"
2021-07-10 13:29:16 +00:00
for (i in lines) {
line = lines[i];
2021-07-10 17:13:17 +00:00
2021-07-10 17:33:33 +00:00
if (match(line, "[^ ]") > indent) {
2021-07-10 17:13:17 +00:00
buf = join_lines(buf, line, "\n");
2021-07-10 17:33:33 +00:00
continue
}
2021-07-10 17:13:17 +00:00
2021-07-10 17:33:33 +00:00
indent = match(line, "[^ ]");
if (buf != "") {
result = join_lines(result, parse_list(buf), "\n");
buf = "";
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:33:33 +00:00
if (i > 1)
result = result "</li>\n"
if (is_bullet && match(line, / *[[:digit:]]+\. /)) {
is_bullet = 0;
result = result "</ul>\n<ol>\n";
}
if (is_bullet == 0 && match(line, / *\* /)) {
is_bullet = 1;
result = result "</ol>\n<ul>\n";
}
2021-07-10 20:04:14 +00:00
result = result "<li>" parse_line(strip_list(line))
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:13:17 +00:00
if (buf != "") {
result = join_lines(result, parse_list(buf), "\n")
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:13:17 +00:00
result = result "</li>";
if (is_bullet)
2021-07-10 17:33:33 +00:00
result = result "\n</ul>";
2021-07-10 17:13:17 +00:00
else
2021-07-10 17:33:33 +00:00
result = result "\n</ol>";
2021-07-10 17:13:17 +00:00
2021-07-10 13:29:16 +00:00
return result;
}
2021-07-10 20:04:14 +00:00
function parse_line(str, result, end, i) {
2021-07-10 13:29:16 +00:00
#print "block '" str "'"
result = ""
for (i=1; i<=length(str); i++) {
if (substr(str, i, 2) == "**") {
end = find(str, "**", i+2);
if (end != 0) {
2021-07-10 20:04:14 +00:00
result = result "<strong>" parse_line(substr(str, i+2, end - i - 2)) "</strong>";
2021-07-10 13:29:16 +00:00
i = end+1;
}
else {
result = result "**";
i++;
}
}
else if (substr(str, i, 3) == "```") {
end = find(str, "```", i+3);
if (end != 0) {
result = result "<code>" substr(str, i+3, end - i - 3) "</code>";
i = end+1;
}
else {
result = result "```";
i=i+2;
}
}
else if (substr(str, i, 1) == "`") {
end = find(str, "`", i+1);
}
else {
if (substr(str, i, 1) == "\n") {
if (length(result) > 0)
result = result " ";
}
else {
result = result substr(str, i, 1);
}
}
}
2021-07-10 20:04:14 +00:00
2021-07-10 13:29:16 +00:00
return result;
}
2021-07-10 20:04:14 +00:00
function parse_blockquote(str, i, lines, line, buf, result) {
split(str, lines, "\n");
str = ""
for (i in lines) {
line = lines[i];
if (match(line, /^>/))
str = join_lines(str, line, "\n");
else
str = join_lines(rstrip(str), lstrip(line), " ");
2021-07-10 13:29:16 +00:00
}
2021-07-10 20:04:14 +00:00
split(str, lines, "\n");
result = "<blockquote>";
buf = "";
for (i in lines) {
line = lines[i];
gsub(/^> ?/, "", line);
if (match(line, /^ *$/)) {
result = join_lines(result, parse_block(buf), "\n");
buf = "";
}
else {
buf = join_lines(buf, line, "\n");
}
2021-07-10 13:29:16 +00:00
}
2021-07-10 20:04:14 +00:00
if (buf != "")
result = join_lines(result, parse_block(buf), "\n");
result = result "\n</blockquote>"
return result;
2021-07-10 13:29:16 +00:00
}
function parse_code(str, i, lines, result) {
if (match(str, /^```.*```$/)) {
gsub(/^```/, "", str);
gsub(/\n```$/, "", str);
return "<pre><code>" str "</code></pre>";
}
if (match(str, /^ /)) {
result = "";
split(str, lines, "\n");
for (i in lines) {
line = lines[i];
gsub(/^ /, "", line);
result = result "\n" line;
}
gsub(/^\n/, "", result);
return "<pre><code>" result "</code></pre>";
}
return "";
}
2021-07-10 20:04:14 +00:00
function parse_block(str) {
if (str == "")
return "";
if (match(str, /^```\n.*```$/) || match(str, /^ /)) {
return parse_code(str);
}
2021-07-10 22:45:07 +00:00
if (substr(str, 1, 1) == "#" || match(body, /^[^\n]+\n[-=]+$/)) {
2021-07-10 20:04:14 +00:00
return parse_header(str);
}
else if (substr(str, 1, 1) == ">") {
return parse_blockquote(str);
}
else if (match(str, /^\* /) || match(str, /^[[:digit:]]\. /)) {
return parse_list(str);
2021-06-27 18:06:41 +00:00
}
2021-07-10 20:04:14 +00:00
else {
return "<p>" parse_line(str) "</p>";
2021-06-27 18:06:41 +00:00
}
}
function line_continues(body, line) {
if (match(body, /^ /) && match(line, /^ /))
return 1;
2021-07-10 20:04:14 +00:00
if (match(body, /^```\n/) && !match(body, /\n```$/))
return 1;
2021-06-27 18:06:41 +00:00
if (match(body, /^#* /))
return 0;
2021-06-27 18:06:41 +00:00
2021-07-10 22:45:07 +00:00
if (match(body, /^[^\n]+\n[-=]+$/))
return 0;
if (line != "")
return 1;
2021-06-27 18:06:41 +00:00
return 0;
2021-06-27 18:06:41 +00:00
}
// {
if (line_continues(body, $0)) {
if (body != "")
body = body "\n" $0;
else
body = $0;
2021-06-27 18:06:41 +00:00
next;
}
if (body != "")
print parse_block(body);
body = $0;
2021-07-10 13:29:16 +00:00
next;
}
2021-06-27 18:06:41 +00:00
END {
if (body != "")
print parse_block(body);
2021-06-27 18:06:41 +00:00
}