markdown.awk/markdown.awk

249 lines
4.1 KiB
Awk
Raw Normal View History

2021-06-27 18:06:41 +00:00
BEGIN {
2021-07-10 13:29:16 +00:00
body = ""
in_code = 0
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function parse_header(str) {
match($0, /#+/);
hnum = RLENGTH;
2021-06-27 18:06:41 +00:00
2021-07-10 13:29:16 +00:00
content = parse_block(substr(str, hnum + 1, length(str) - hnum ));
return "<h" hnum ">" content "</h" hnum ">";
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function read_line(str, pos, res, i) {
res = "";
for (i=pos; i<=length(str); i++) {
if (substr(str, i, 1) == "\n")
return res;
res = res substr(str, i, 1);
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return res;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function find(str, s, i, sl, j) {
sl = length(s);
for (j = i; j <= length(str); j++) {
if (substr(str, j, sl) == s)
return j;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
function startswith(str, s, sl, j) {
sl = length(s);
for (j = 1; j <= length(str); j++) {
if (substr(str, j, sl) == s)
return j;
if (substr(str, j, 1) != " ")
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
return 0;
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
function rstrip(str) {
gsub(/ *\n*$/, "", str);
return str;
}
function lstrip(str) {
gsub(/^ *\n*/, "", str);
return str;
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
function join_lines(first, second, sep) {
if (sep == "")
sep = " ";
if (second == "")
return first;
if (first == "")
return second;
return first sep second;
}
function strip_list(str) {
gsub(/^ *\* /, "", str);
gsub(/^ *[[:digit:]]*\. /, "", str);
return str;
}
function parse_list(str, buf, result, i, ind, line, lines, indent, is_bullet) {
result = "";
2021-07-10 13:29:16 +00:00
buf = "";
2021-06-27 18:06:41 +00:00
2021-07-10 13:29:16 +00:00
split(str, lines, "\n");
2021-06-27 18:06:41 +00:00
2021-07-10 17:13:17 +00:00
str = ""
2021-07-10 13:29:16 +00:00
for (i in lines) {
line = lines[i];
2021-07-10 17:13:17 +00:00
if (match(line, / *\* /) || match(line, / *[[:digit:]]+\. /))
str = join_lines(str, line, "\n");
else
str = join_lines(rstrip(str), lstrip(line), " ");
2021-06-27 18:06:41 +00:00
}
2021-07-10 17:13:17 +00:00
split(str, lines, "\n")
indent = match(str, /[^ ]/);
is_bullet = match(str, /^ *\* /)
2021-07-10 17:33:33 +00:00
if (is_bullet)
result = "<ul>\n"
else
result = "<ol>\n"
2021-07-10 13:29:16 +00:00
for (i in lines) {
line = lines[i];
2021-07-10 17:13:17 +00:00
2021-07-10 17:33:33 +00:00
if (match(line, "[^ ]") > indent) {
2021-07-10 17:13:17 +00:00
buf = join_lines(buf, line, "\n");
2021-07-10 17:33:33 +00:00
continue
}
2021-07-10 17:13:17 +00:00
2021-07-10 17:33:33 +00:00
indent = match(line, "[^ ]");
if (buf != "") {
result = join_lines(result, parse_list(buf), "\n");
buf = "";
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:33:33 +00:00
if (i > 1)
result = result "</li>\n"
if (is_bullet && match(line, / *[[:digit:]]+\. /)) {
is_bullet = 0;
result = result "</ul>\n<ol>\n";
}
if (is_bullet == 0 && match(line, / *\* /)) {
is_bullet = 1;
result = result "</ol>\n<ul>\n";
}
result = result "<li>" parse_block(strip_list(line))
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:13:17 +00:00
if (buf != "") {
result = join_lines(result, parse_list(buf), "\n")
2021-07-10 13:29:16 +00:00
}
2021-07-10 17:13:17 +00:00
result = result "</li>";
if (is_bullet)
2021-07-10 17:33:33 +00:00
result = result "\n</ul>";
2021-07-10 17:13:17 +00:00
else
2021-07-10 17:33:33 +00:00
result = result "\n</ol>";
2021-07-10 17:13:17 +00:00
2021-07-10 13:29:16 +00:00
return result;
}
function parse_block(str, result, end, i) {
#print "block '" str "'"
result = ""
for (i=1; i<=length(str); i++) {
if (substr(str, i, 2) == "**") {
end = find(str, "**", i+2);
if (end != 0) {
result = result "<strong>" parse_block(substr(str, i+2, end - i - 2)) "</strong>";
i = end+1;
}
else {
result = result "**";
i++;
}
}
else if (substr(str, i, 3) == "```") {
end = find(str, "```", i+3);
if (end != 0) {
result = result "<code>" substr(str, i+3, end - i - 3) "</code>";
i = end+1;
}
else {
result = result "```";
i=i+2;
}
}
else if (substr(str, i, 1) == "`") {
end = find(str, "`", i+1);
}
else {
if (substr(str, i, 1) == "\n") {
if (length(result) > 0)
result = result " ";
}
else {
result = result substr(str, i, 1);
}
}
}
#print "block result '" result "'"
return result;
}
function parse_paragraph(str) {
2021-07-10 17:33:33 +00:00
if (match(str, /^\* /) || match(str, /^[[:digit:]]\. /)) {
return parse_list(str);
2021-07-10 13:29:16 +00:00
}
else {
return "<p>" parse_block(str) "</p>";
}
}
function parse_body(str) {
if (substr(str, 1, 1) == "#") {
print(parse_header(str));
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
else {
print(parse_paragraph(str));
2021-06-27 18:06:41 +00:00
}
}
2021-07-10 13:29:16 +00:00
/^#/ {
if (body != "") {
parse_body(body);
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
parse_body($0);
body = "";
next;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
/^$/ {
if (body == "")
next;
2021-06-27 18:06:41 +00:00
2021-07-10 13:29:16 +00:00
if (startswith(body, "```") == 1) {
body = body "\n";
2021-06-27 18:06:41 +00:00
next;
}
2021-07-10 13:29:16 +00:00
parse_body(body);
body = "";
next;
2021-06-27 18:06:41 +00:00
}
2021-07-10 13:29:16 +00:00
/```/ {
if (startswith(body, "```") == 1) {
2021-07-10 17:13:17 +00:00
print "<pre><code>" substr(body, 4, length(body)-3) "</code></pre>";
2021-07-10 13:29:16 +00:00
body = "";
2021-06-27 18:06:41 +00:00
next;
}
}
// {
2021-07-10 17:13:17 +00:00
body = join_lines(body, $0, "\n")
2021-07-10 13:29:16 +00:00
next;
}
2021-06-27 18:06:41 +00:00
END {
2021-07-10 13:29:16 +00:00
if (body != "") {
parse_body(body);
}
2021-06-27 18:06:41 +00:00
}