Add proper support for escape sequences

This commit is contained in:
Konstantin Nazarov 2021-07-25 21:19:17 +00:00
parent 8171fc9374
commit d96e204c3a
Signed by: knazarov
GPG key ID: 4CFE0A42FA409C22
2 changed files with 106 additions and 18 deletions

View file

@ -160,17 +160,6 @@ function is_token(str, i, tok) {
return substr(str, i, length(tok)) == tok; return substr(str, i, length(tok)) == tok;
} }
function escape_char(char) {
if (char == "<")
return "&lt;";
if (char == ">")
return "&gt;";
if (char == "&")
return "&amp;";
return char;
}
function extract_html_tag(str, i, sstr) { function extract_html_tag(str, i, sstr) {
sstr=substr(str, i, length(str) - i + 1); sstr=substr(str, i, length(str) - i + 1);
@ -190,6 +179,19 @@ function is_html_tag(str, i, sstr) {
return 1; return 1;
} }
function is_escape_sequence(str, i, sstr) {
sstr=substr(str, i, length(str) - i + 1);
return match(sstr, /^\\[`\\*_{}\[\]()>#.!+-]/);
}
function escape_text(str) {
gsub(/&/, "\\&amp;", str);
gsub(/</, "\\&lt;", str);
gsub(/>/, "\\&gt;", str);
return str;
}
function parse_line(str, result, end, i) { function parse_line(str, result, end, i) {
#print "block '" str "'" #print "block '" str "'"
result = "" result = ""
@ -211,8 +213,8 @@ function parse_line(str, result, end, i) {
else if (is_token(str, i, "```")) { else if (is_token(str, i, "```")) {
end = find(str, "```", i+3); end = find(str, "```", i+3);
if (end != 0) { if (end != 0) {
result = result "<code>" substr(str, i+3, end - i - 3) "</code>"; result = result "<code>" escape_text(substr(str, i+3, end - i - 3)) "</code>";
i = end+1; i = end+2;
} }
else { else {
result = result "```"; result = result "```";
@ -221,20 +223,30 @@ function parse_line(str, result, end, i) {
} }
else if (substr(str, i, 1) == "`") { else if (substr(str, i, 1) == "`") {
end = find(str, "`", i+1); end = find(str, "`", i+1);
if (end != 0) {
result = result "<code>" escape_text(substr(str, i+1, end - i - 1)) "</code>";
i = end+1;
}
else {
result = result "`";
}
} }
else if (is_html_tag(str, i)) { else if (is_html_tag(str, i)) {
tag = extract_html_tag(str, i); tag = extract_html_tag(str, i);
result = result tag; result = result tag;
i = i + length(tag) - 1; i = i + length(tag) - 1;
} }
else if (is_escape_sequence(str, i)) {
result = result escape_text(substr(str, i+1, 1));
i = i + 1;
}
else { else {
if (substr(str, i, 1) == "\n") { if (substr(str, i, 1) == "\n") {
if (length(result) > 0) if (length(result) > 0)
result = result " "; result = result " ";
} }
else { else {
result = result escape_char(substr(str, i, 1)); result = result escape_text(substr(str, i, 1));
} }
} }
} }
@ -284,7 +296,7 @@ function parse_code(str, i, lines, result) {
if (match(str, /^```.*```$/)) { if (match(str, /^```.*```$/)) {
gsub(/^```/, "", str); gsub(/^```/, "", str);
gsub(/\n```$/, "", str); gsub(/\n```$/, "", str);
return "<pre><code>" str "</code></pre>"; return "<pre><code>" escape_text(str) "</code></pre>";
} }
if (match(str, /^ /)) { if (match(str, /^ /)) {
result = ""; result = "";
@ -296,7 +308,7 @@ function parse_code(str, i, lines, result) {
result = result "\n" line; result = result "\n" line;
} }
gsub(/^\n/, "", result); gsub(/^\n/, "", result);
return "<pre><code>" result "</code></pre>"; return "<pre><code>" escape_text(result) "</code></pre>";
} }
return ""; return "";

78
test.sh
View file

@ -121,12 +121,16 @@ check <<-"EOF"
first line of code first line of code
second line of code second line of code
> < &
``` ```
--- ---
<pre><code> <pre><code>
first line of code first line of code
second line of code</code></pre> second line of code
&gt; &lt; &amp;</code></pre>
EOF EOF
check <<-"EOF" check <<-"EOF"
@ -138,6 +142,12 @@ second line of code
<p>``` first line of code second line of code</p> <p>``` first line of code second line of code</p>
EOF EOF
check <<-"EOF"
This is `inline code`
---
<p>This is <code>inline code</code></p>
EOF
check <<-"EOF" check <<-"EOF"
code code
indented by indented by
@ -311,5 +321,71 @@ foo <a href="" ></a> bar
<p>foo <a href="" ></a> bar</p> <p>foo <a href="" ></a> bar</p>
EOF EOF
check <<-"EOF"
\\
\`
\*
\_
\{
\}
\[
\]
\(
\)
\>
\#
\.
\!
\+
\-
---
<p>\</p>
<p>`</p>
<p>*</p>
<p>_</p>
<p>{</p>
<p>}</p>
<p>[</p>
<p>]</p>
<p>(</p>
<p>)</p>
<p>&gt;</p>
<p>#</p>
<p>.</p>
<p>!</p>
<p>+</p>
<p>-</p>
EOF
check <<-"EOF"
`This shouldn't be escaped: \*`
---
<p><code>This shouldn't be escaped: \*</code></p>
EOF
check <<-"EOF"
```
This shouldn't be escaped: \*
```
---
<pre><code>
This shouldn't be escaped: \*</code></pre>
EOF
echo echo
echo "All tests passed" echo "All tests passed"