diff options
author | Nicholas Johnson <nick@nicksphere.ch> | 2022-01-18 00:00:00 +0000 |
---|---|---|
committer | Nicholas Johnson <nick@nicksphere.ch> | 2022-01-18 00:00:00 +0000 |
commit | 5179da0b877893ff81da5a63191061b63a710727d7518093eba92d88ee2e2784 (patch) | |
tree | 12dc838430dee196b748d7a3b2e90ac7e4eba37a8c60d1bf15a33931f6356631 | |
parent | 610450d2dfd2737149f9b47f3570c4d96a9f0361a2b15fce4bbcaed897ccc095 (diff) |
Improve and simplify text escaping
-rw-r--r-- | src/gemini2html.c | 106 |
1 files changed, 47 insertions, 59 deletions
diff --git a/src/gemini2html.c b/src/gemini2html.c index c71bc58..48c5128 100644 --- a/src/gemini2html.c +++ b/src/gemini2html.c @@ -23,37 +23,7 @@ #include "gemini2html.h" -/* Escapes text so it can be safely inserted within HTML tags. - * Examples: - * "5 < 6" => "5 < 6" - * "6 > 5" => "6 > 5" - * "me & you" => "me & you" - */ -char* escape_html_tag(const char* unescaped, const size_t size) { - char* escaped = malloc(5 * size + 1); - size_t escaped_pos = 0; - char substitute[6] = ""; - - for (size_t pos = 0; pos < size; pos++) { - switch (unescaped[pos]) { - case '<': strcpy(substitute, "<"); break; - case '>': strcpy(substitute, ">"); break; - case '&': strcpy(substitute, "&"); break; - default: - strncpy(substitute, unescaped + pos, 1); - substitute[1] = '\0'; - break; - } - for (size_t i = 0; i < strlen(substitute); i++) { - escaped[escaped_pos++] = substitute[i]; - } - } - escaped[escaped_pos] = '\0'; - - return escaped; -} - -/* Escapes text so it can be safely inserted within HTML tags. +/* Escapes text so it can be safely inserted within HTML tags and attributes. * Examples: * "5 < 6" => "5 < 6" * "6 > 5" => "6 > 5" @@ -61,29 +31,47 @@ char* escape_html_tag(const char* unescaped, const size_t size) { * "i'm so high" => "I'm so high" * ""hello world" => ""hello world"" */ -char* escape_html_attribute(const char* unescaped, const size_t size) { +char* escape_text(const char* unescaped, const size_t size) { + /* In the worst case, every unescaped character corresponds to 6 escape characters. + * Examples: + * "'" => "'" + * """ => """ + * + * +1 for the NULL byte at the end of the string + */ char* escaped = malloc(6 * size + 1); + size_t escaped_pos = 0; - char substitute[7] = ""; - - for (size_t pos = 0; pos < size; pos++) { - switch (unescaped[pos]) { - case '<': strcpy(substitute, "<"); break; - case '>': strcpy(substitute, ">"); break; - case '&': strcpy(substitute, "&"); break; - case '\'': strcpy(substitute, "'"); break; - case '\"': strcpy(substitute, """); break; + for (size_t unescaped_pos = 0; unescaped_pos < size; unescaped_pos++) { + switch (unescaped[unescaped_pos]) { + case '<': + strncpy(escaped + escaped_pos, "<", 4); + escaped_pos += 4; + break; + case '>': + strncpy(escaped + escaped_pos, ">", 4); + escaped_pos += 4; + break; + case '&': + strncpy(escaped + escaped_pos, "&", 5); + escaped_pos += 5; + break; + case '\'': + strncpy(escaped + escaped_pos, "'", 6); + escaped_pos += 6; + break; + case '\"': + strncpy(escaped + escaped_pos, """, 6); + escaped_pos += 6; + break; default: - strncpy(substitute, unescaped + pos, 1); - substitute[1] = '\0'; + strncpy(escaped + escaped_pos, unescaped + unescaped_pos, 1); + escaped_pos += 1; break; } - for (size_t i = 0; i < strlen(substitute); i++) { - escaped[escaped_pos++] = substitute[i]; - } } - escaped[escaped_pos] = '\0'; + escaped[escaped_pos] = '\0'; return escaped; } @@ -99,8 +87,8 @@ int skip_non_whitespace(const char* str, size_t* pos) { while (str[*pos] != '\0' && !(str[*pos] == ' ' || str[*pos] == '\t')) (*pos)++; } -char* escape_and_add_tags(const char* opening_tag, const char* unescaped, const char* closing_tag, const size_t unescaped_size, char* (*escape_fun)(const char*, const size_t), const bool add_newline) { - char* escaped = escape_fun(unescaped, unescaped_size); +char* escape_and_add_tags(const char* opening_tag, const char* unescaped, const char* closing_tag, const size_t unescaped_size, const bool add_newline) { + char* escaped = escape_text(unescaped, unescaped_size); size_t escaped_len = strlen(escaped); char* escaped_with_tags = malloc(strlen(opening_tag) + escaped_len + strlen(closing_tag) + 1); @@ -142,11 +130,11 @@ enum linetype getlinetype(const char* line, const bool pre_is_toggled) { char* convert_text_line(const char* line, const size_t size) { if (line[0] == '\n') return strdup("<br/>\n"); - else return escape_and_add_tags("<p>", line, "</p>", size, escape_html_tag, true); + else return escape_and_add_tags("<p>", line, "</p>", size, true); } char* convert_pre_line(const char* line, const size_t size) { - return escape_and_add_tags("", line, "", size, escape_html_tag, true); + return escape_and_add_tags("", line, "", size, true); } char* convert_pre_toggle_line(const char* line, const size_t size) { @@ -168,14 +156,14 @@ char* convert_link_line(const char* line, const size_t size) { size_t before_link_text = after_link; skip_whitespace(line, &before_link_text); - char* escaped_href = escape_and_add_tags("<a rel=\"noreferrer noopener\" href=\"", line + before_link, "\">", after_link - before_link, escape_html_attribute, false); + char* escaped_href = escape_and_add_tags("<a rel=\"noreferrer noopener\" href=\"", line + before_link, "\">", after_link - before_link, false); char* escaped_with_tags = NULL; if (line[before_link_text] == '\0') { - escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link, "</a>", after_link - before_link, escape_html_tag, true); + escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link, "</a>", after_link - before_link, true); } else { - escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link_text, "</a>", size - before_link_text, escape_html_tag, true); + escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link_text, "</a>", size - before_link_text, true); } free(escaped_href); @@ -190,7 +178,7 @@ char* convert_h1_line(const char* line, const size_t size) { skip_whitespace(line, &pos); if (line[pos] == '\0') return strdup("<h1></h1>\n"); - else return escape_and_add_tags("<h1>", line + pos, "</h1>", size - pos, escape_html_tag, true); + else return escape_and_add_tags("<h1>", line + pos, "</h1>", size - pos, true); } char* convert_h2_line(const char* line, const size_t size) { @@ -199,7 +187,7 @@ char* convert_h2_line(const char* line, const size_t size) { skip_whitespace(line, &pos); if (line[pos] == '\0') return strdup("<h2></h2>\n"); - else return escape_and_add_tags("<h2>", line + pos, "</h2>", size - pos, escape_html_tag, true); + else return escape_and_add_tags("<h2>", line + pos, "</h2>", size - pos, true); } char* convert_h3_line(const char* line, const size_t size) { @@ -208,7 +196,7 @@ char* convert_h3_line(const char* line, const size_t size) { skip_whitespace(line, &pos); if (line[pos] == '\0') return strdup("<h3></h3>\n"); - else return escape_and_add_tags("<h3>", line + pos, "</h3>", size - pos, escape_html_tag, true); + else return escape_and_add_tags("<h3>", line + pos, "</h3>", size - pos, true); } char* convert_ul_line(const char* line, const size_t size) { @@ -217,7 +205,7 @@ char* convert_ul_line(const char* line, const size_t size) { skip_whitespace(line, &pos); if (line[pos] == '\0') return strdup("<li></li>\n"); - else return escape_and_add_tags("<li>", line + pos, "</li>", size - pos, escape_html_tag, true); + else return escape_and_add_tags("<li>", line + pos, "</li>", size - pos, true); } char* convert_quote_line(const char* line, const size_t size) { @@ -226,7 +214,7 @@ char* convert_quote_line(const char* line, const size_t size) { skip_whitespace(line, &pos); if (line[pos] == '\0') return strdup("\n"); - else return escape_and_add_tags("", line + pos, "", size - pos, escape_html_tag, true); + else return escape_and_add_tags("", line + pos, "", size - pos, true); } char* convert_line(const char* line, const size_t size, const enum linetype type) { |