#!/usr/bin/awk -f BEGIN { body = "" in_code = 0 } function parse_header(str, hnum, content) { if (substr(str, 1, 1) == "#") { gsub(/ *#* *$/, "", str); match(str, /#+/); hnum = RLENGTH; gsub(/^#+ */, "", str); content = parse_line(str); return "" content ""; } if (match(body, /^[^\n]+\n=+$/)) { gsub(/\n=+$/, "", str); return "

" parse_line(str) "

" } if (match(body, /^[^\n]+\n-+$/)) { gsub(/\n-+$/, "", str); return "

" parse_line(str) "

" } return ""; } function read_line(str, pos, res, i) { res = ""; for (i=pos; i<=length(str); i++) { if (substr(str, i, 1) == "\n") return res; res = res substr(str, i, 1); } return res; } function find(str, s, i, sl, j) { sl = length(s); for (j = i; j <= length(str); j++) { if (substr(str, j, sl) == s) return j; } return 0; } function startswith(str, s, sl, j) { sl = length(s); for (j = 1; j <= length(str); j++) { if (substr(str, j, sl) == s) return j; if (substr(str, j, 1) != " ") return 0; } return 0; } function rstrip(str) { gsub(/ *\n*$/, "", str); return str; } function lstrip(str) { gsub(/^ *\n*/, "", str); return str; } function escape_special() { } function join_lines(first, second, sep) { if (sep == "") sep = " "; if (second == "") return first; if (first == "") return second; return first sep second; } function strip_list(str) { gsub(/^[[:space:]]*[-+*][[:space:]]/, "", str); gsub(/^[[:space:]]*[[:digit:]]*\.[[:space:]]/, "", str); return str; } function parse_list(str, buf, result, i, ind, line, lines, indent, is_bullet) { result = ""; buf = ""; split(str, lines, "\n"); str = "" for (i=1; i<=length(lines); i++) { line = lines[i]; if (match(line, /^[[:space:]]*[-+*][[:space:]]/) || match(line, /^[[:space:]]*[[:digit:]]+\.[[:space:]]/)) str = join_lines(str, line, "\n"); else str = join_lines(rstrip(str), lstrip(line), " "); } split(str, lines, "\n") indent = match(str, /[^ ]/); is_bullet = match(str, /^[[:space:]]*[-+*][[:space:]]/) if (is_bullet) result = "\n
    \n"; } if (is_bullet == 0 && match(line, /[[:space:]]*[-+*][[:space:]]/)) { is_bullet = 1; result = result "
\n"; else result = result "\n"; return result; } function is_token(str, i, tok) { return substr(str, i, length(tok)) == tok; } function extract_html_tag(str, i, sstr) { sstr=substr(str, i, length(str) - i + 1); if (match(sstr, /^<\/[a-zA-Z][a-zA-Z0-9]*>/)) return substr(str, i, RLENGTH) ; if (match(sstr, /^<[a-zA-Z][a-zA-Z0-9]*( *[a-zA-Z][a-zA-Z0-9]* *= *"[^"]*")* *>/)) return substr(str, i, RLENGTH); return ""; } function is_html_tag(str, i, sstr) { if (extract_html_tag(str, i) == "") return 0; return 1; } function is_escape_sequence(str, i, sstr) { sstr=substr(str, i, length(str) - i + 1); return match(sstr, /^\\[`\\*_{}\[\]()>#.!+-]/); } function extract_link(str, i, sstr) { sstr=substr(str, i, length(str) - i + 1); if (!match(sstr, /^\[([^\[\]]*)\]\( *([^() ]*)( +"([^"]*)")? *\)/)) return ""; return substr(str, i, RLENGTH); } function parse_link(str, arr) { match(str, /^\[([^\[\]]*)\]/); name = substr(str, 2, RLENGTH-2); sub(/^\[([^\[\]]*)\]/, "", str); sub(/^ *\( */, "", str); sub(/ *\) *$/, "", str); match(str, /^[^() ]*/); url = substr(str, 1, RLENGTH); sub(/^[^() ]*/, "", str); sub(/^ *"/, "", str); sub(/" *$/, "", str); title = str; if (title == "") { return "" name "" } return "" name "" } function extract_image(str, i, sstr) { sstr=substr(str, i, length(str) - i + 1); if (!match(sstr, /^!\[([^\[\]]*)\]\( *([^() ]*)( +"([^"]*)")? *\)/)) return ""; return substr(str, i, RLENGTH); } function parse_image(str, arr) { match(str, /^!\[([^\[\]]*)\]/); name = substr(str, 3, RLENGTH-3); sub(/^!\[([^\[\]]*)\]/, "", str); sub(/^ *\( */, "", str); sub(/ *\) *$/, "", str); match(str, /^[^() ]*/); url = substr(str, 1, RLENGTH); sub(/^[^() ]*/, "", str); sub(/^ *"/, "", str); sub(/" *$/, "", str); title = str; if (title == "") { return "\""" } return "\""" } function is_link(str, i) { return extract_link(str, i) != ""; } function is_image(str, i) { return extract_image(str, i) != ""; } function escape_text(str) { gsub(/&/, "\\&", str); gsub(//, "\\>", str); return str; } function extract_emphasis(str, i, sstr) { sstr=substr(str, i, length(str) - i + 1); if (match(sstr, /^\*[^\*]+\*/) || match(sstr, /^\*\*[^\*]+\*\*/) || match(sstr, /^\*\*\*[^\*]+\*\*\*/) || match(sstr, /^_[^_]+_/) || match(sstr, /^__[^_]+__/) || match(sstr, /^___[^_]+___/)) return substr(str, i, RLENGTH); return ""; } function parse_emphasis(str, i) { match(str, /^[\*_]{1,3}/); num = RLENGTH; if (num == 1) { return "" parse_line(substr(str, 2, length(str) - 2)) ""; } if (num == 2) { return "" parse_line(substr(str, 3, length(str) - 4)) ""; } if (num == 3) { return "" parse_line(substr(str, 4, length(str) - 6)) ""; } return ""; } function parse_line(str, result, end, i, c) { result = "" for (i=1; i<=length(str); i++) { c = substr(str, i, 1); if ((c == "*" || c == "_") && extract_emphasis(str, i) != ""){ emphasis = extract_emphasis(str, i); result = result parse_emphasis(emphasis) i = i + length(emphasis) - 1; } else if (c == "`" && is_token(str, i, "```")) { end = find(str, "```", i+3); if (end != 0) { result = result "" escape_text(substr(str, i+3, end - i - 3)) ""; i = end+2; } else { result = result "```"; i=i+2; } } else if (c == "`" && substr(str, i, 1) == "`") { end = find(str, "`", i+1); if (end != 0) { result = result "" escape_text(substr(str, i+1, end - i - 1)) ""; i = end; } else { result = result "`"; } } else if (c == "<" && is_html_tag(str, i)) { tag = extract_html_tag(str, i); result = result tag; i = i + length(tag) - 1; } else if (c == "\\" && is_escape_sequence(str, i)) { result = result escape_text(substr(str, i+1, 1)); i = i + 1; } else if (c == "[" && is_link(str, i)) { link = extract_link(str, i); result = result parse_link(link); i = i + length(link) - 1; } else if (c == "!" && is_image(str, i)) { image = extract_image(str, i); result = result parse_image(image); i = i + length(image) - 1; } else { if (c == "\n") { if (length(result) > 0) result = result " "; } else { result = result escape_text(c); } } } return result; } function parse_blockquote(str, i, lines, line, buf, result) { split(str, lines, "\n"); str = "" for (i=1; i<=length(lines); i++) { line = lines[i]; if (match(line, /^>/)) str = join_lines(str, line, "\n"); else str = join_lines(rstrip(str), lstrip(line), " "); } split(str, lines, "\n"); result = "
"; buf = ""; for (i=1; i<=length(lines); i++) { line = lines[i]; gsub(/^> ?/, "", line); if (buf != "") buf = buf "\n" line; else buf = line; } if (buf != "") result = join_lines(result, parse_body(buf), "\n"); result = result "\n
" return result; } function parse_code(str, i, lines, result) { if (match(str, /^```.*```$/)) { gsub(/^```/, "", str); gsub(/\n```$/, "", str); return "
" escape_text(str) "
"; } if (match(str, /^ /)) { result = ""; split(str, lines, "\n"); for (i=1; i<=length(lines); i++) { line = lines[i]; gsub(/^ /, "", line); result = result "\n" line; } gsub(/^\n/, "", result); return "
" escape_text(result) "
"; } return ""; } function parse_block(str) { if (str == "") return ""; if (match(str, /^```\n.*```$/) || match(str, /^ /)) { return parse_code(str); } if (substr(str, 1, 1) == "#" || match(body, /^[^\n]+\n[-=]+$/)) { return parse_header(str); } else if (substr(str, 1, 1) == ">") { return parse_blockquote(str); } else if ( \ match(str, /^([[:space:]]*\*){3,}[[:space:]]*$/) || match(str, /^([[:space:]]*-){3,}[[:space:]]*$/) || match(str, /^([[:space:]]*_){3,}[[:space:]]*$/)) { return "
"; } else if (match(str, /^[-+*][[:space:]]/) || match(str, /^[[:digit:]]\.[[:space:]]/)) { return parse_list(str); } else { return "

" parse_line(str) "

"; } } function parse_body(str, body, line, lines, result, i) { split(str, lines, "\n"); result = ""; body = ""; for (i=1; i<=length(lines); i++) { line = lines[i]; if (line_continues(body, line)) { if (body != "") body = body "\n" line; else body = line; } else if (body != "") { result = join_lines(result, parse_block(body), "\n"); body = ""; } } if (body != "") result = join_lines(result, parse_block(body), "\n"); return result; } function line_continues(body, line) { if (match(body, /^ /) && (match(line, /^ /) || line == "")) return 1; if (match(body, /^```\n/) && !match(body, /\n```$/)) return 1; if (match(body, /^#* /)) return 0; if (match(body, /^[^\n]+\n[-=]+$/)) return 0; if (line != "") return 1; return 0; } // { if (line_continues(body, $0)) { if (body != "") body = body "\n" $0; else body = $0; next; } if (body != "") print parse_block(body); body = $0; next; } END { if (body != "") print parse_block(body); }