aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Johnson <nick@nicksphere.ch>2022-01-18 00:00:00 +0000
committerNicholas Johnson <nick@nicksphere.ch>2022-01-18 00:00:00 +0000
commit5179da0b877893ff81da5a63191061b63a710727d7518093eba92d88ee2e2784 (patch)
tree12dc838430dee196b748d7a3b2e90ac7e4eba37a8c60d1bf15a33931f6356631
parent610450d2dfd2737149f9b47f3570c4d96a9f0361a2b15fce4bbcaed897ccc095 (diff)
Improve and simplify text escaping
-rw-r--r--src/gemini2html.c106
1 files changed, 47 insertions, 59 deletions
diff --git a/src/gemini2html.c b/src/gemini2html.c
index c71bc58..48c5128 100644
--- a/src/gemini2html.c
+++ b/src/gemini2html.c
@@ -23,37 +23,7 @@
#include "gemini2html.h"
-/* Escapes text so it can be safely inserted within HTML tags.
- * Examples:
- * "5 < 6" => "5 &lt; 6"
- * "6 > 5" => "6 &gt; 5"
- * "me & you" => "me &amp; you"
- */
-char* escape_html_tag(const char* unescaped, const size_t size) {
- char* escaped = malloc(5 * size + 1);
- size_t escaped_pos = 0;
- char substitute[6] = "";
-
- for (size_t pos = 0; pos < size; pos++) {
- switch (unescaped[pos]) {
- case '<': strcpy(substitute, "&lt;"); break;
- case '>': strcpy(substitute, "&gt;"); break;
- case '&': strcpy(substitute, "&amp;"); break;
- default:
- strncpy(substitute, unescaped + pos, 1);
- substitute[1] = '\0';
- break;
- }
- for (size_t i = 0; i < strlen(substitute); i++) {
- escaped[escaped_pos++] = substitute[i];
- }
- }
- escaped[escaped_pos] = '\0';
-
- return escaped;
-}
-
-/* Escapes text so it can be safely inserted within HTML tags.
+/* Escapes text so it can be safely inserted within HTML tags and attributes.
* Examples:
* "5 < 6" => "5 &lt; 6"
* "6 > 5" => "6 &gt; 5"
@@ -61,29 +31,47 @@ char* escape_html_tag(const char* unescaped, const size_t size) {
* "i'm so high" => "I&apos;m so high"
* ""hello world" => "&quot;hello world&quot;"
*/
-char* escape_html_attribute(const char* unescaped, const size_t size) {
+char* escape_text(const char* unescaped, const size_t size) {
+ /* In the worst case, every unescaped character corresponds to 6 escape characters.
+ * Examples:
+ * "'" => "&apos;"
+ * """ => "&quot;"
+ *
+ * +1 for the NULL byte at the end of the string
+ */
char* escaped = malloc(6 * size + 1);
+
size_t escaped_pos = 0;
- char substitute[7] = "";
-
- for (size_t pos = 0; pos < size; pos++) {
- switch (unescaped[pos]) {
- case '<': strcpy(substitute, "&lt;"); break;
- case '>': strcpy(substitute, "&gt;"); break;
- case '&': strcpy(substitute, "&amp;"); break;
- case '\'': strcpy(substitute, "&apos;"); break;
- case '\"': strcpy(substitute, "&quot;"); break;
+ for (size_t unescaped_pos = 0; unescaped_pos < size; unescaped_pos++) {
+ switch (unescaped[unescaped_pos]) {
+ case '<':
+ strncpy(escaped + escaped_pos, "&lt;", 4);
+ escaped_pos += 4;
+ break;
+ case '>':
+ strncpy(escaped + escaped_pos, "&gt;", 4);
+ escaped_pos += 4;
+ break;
+ case '&':
+ strncpy(escaped + escaped_pos, "&amp;", 5);
+ escaped_pos += 5;
+ break;
+ case '\'':
+ strncpy(escaped + escaped_pos, "&apos;", 6);
+ escaped_pos += 6;
+ break;
+ case '\"':
+ strncpy(escaped + escaped_pos, "&quot;", 6);
+ escaped_pos += 6;
+ break;
default:
- strncpy(substitute, unescaped + pos, 1);
- substitute[1] = '\0';
+ strncpy(escaped + escaped_pos, unescaped + unescaped_pos, 1);
+ escaped_pos += 1;
break;
}
- for (size_t i = 0; i < strlen(substitute); i++) {
- escaped[escaped_pos++] = substitute[i];
- }
}
- escaped[escaped_pos] = '\0';
+ escaped[escaped_pos] = '\0';
return escaped;
}
@@ -99,8 +87,8 @@ int skip_non_whitespace(const char* str, size_t* pos) {
while (str[*pos] != '\0' && !(str[*pos] == ' ' || str[*pos] == '\t')) (*pos)++;
}
-char* escape_and_add_tags(const char* opening_tag, const char* unescaped, const char* closing_tag, const size_t unescaped_size, char* (*escape_fun)(const char*, const size_t), const bool add_newline) {
- char* escaped = escape_fun(unescaped, unescaped_size);
+char* escape_and_add_tags(const char* opening_tag, const char* unescaped, const char* closing_tag, const size_t unescaped_size, const bool add_newline) {
+ char* escaped = escape_text(unescaped, unescaped_size);
size_t escaped_len = strlen(escaped);
char* escaped_with_tags = malloc(strlen(opening_tag) + escaped_len + strlen(closing_tag) + 1);
@@ -142,11 +130,11 @@ enum linetype getlinetype(const char* line, const bool pre_is_toggled) {
char* convert_text_line(const char* line, const size_t size) {
if (line[0] == '\n') return strdup("<br/>\n");
- else return escape_and_add_tags("<p>", line, "</p>", size, escape_html_tag, true);
+ else return escape_and_add_tags("<p>", line, "</p>", size, true);
}
char* convert_pre_line(const char* line, const size_t size) {
- return escape_and_add_tags("", line, "", size, escape_html_tag, true);
+ return escape_and_add_tags("", line, "", size, true);
}
char* convert_pre_toggle_line(const char* line, const size_t size) {
@@ -168,14 +156,14 @@ char* convert_link_line(const char* line, const size_t size) {
size_t before_link_text = after_link;
skip_whitespace(line, &before_link_text);
- char* escaped_href = escape_and_add_tags("<a rel=\"noreferrer noopener\" href=\"", line + before_link, "\">", after_link - before_link, escape_html_attribute, false);
+ char* escaped_href = escape_and_add_tags("<a rel=\"noreferrer noopener\" href=\"", line + before_link, "\">", after_link - before_link, false);
char* escaped_with_tags = NULL;
if (line[before_link_text] == '\0') {
- escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link, "</a>", after_link - before_link, escape_html_tag, true);
+ escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link, "</a>", after_link - before_link, true);
} else {
- escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link_text, "</a>", size - before_link_text, escape_html_tag, true);
+ escaped_with_tags = escape_and_add_tags(escaped_href, line + before_link_text, "</a>", size - before_link_text, true);
}
free(escaped_href);
@@ -190,7 +178,7 @@ char* convert_h1_line(const char* line, const size_t size) {
skip_whitespace(line, &pos);
if (line[pos] == '\0') return strdup("<h1></h1>\n");
- else return escape_and_add_tags("<h1>", line + pos, "</h1>", size - pos, escape_html_tag, true);
+ else return escape_and_add_tags("<h1>", line + pos, "</h1>", size - pos, true);
}
char* convert_h2_line(const char* line, const size_t size) {
@@ -199,7 +187,7 @@ char* convert_h2_line(const char* line, const size_t size) {
skip_whitespace(line, &pos);
if (line[pos] == '\0') return strdup("<h2></h2>\n");
- else return escape_and_add_tags("<h2>", line + pos, "</h2>", size - pos, escape_html_tag, true);
+ else return escape_and_add_tags("<h2>", line + pos, "</h2>", size - pos, true);
}
char* convert_h3_line(const char* line, const size_t size) {
@@ -208,7 +196,7 @@ char* convert_h3_line(const char* line, const size_t size) {
skip_whitespace(line, &pos);
if (line[pos] == '\0') return strdup("<h3></h3>\n");
- else return escape_and_add_tags("<h3>", line + pos, "</h3>", size - pos, escape_html_tag, true);
+ else return escape_and_add_tags("<h3>", line + pos, "</h3>", size - pos, true);
}
char* convert_ul_line(const char* line, const size_t size) {
@@ -217,7 +205,7 @@ char* convert_ul_line(const char* line, const size_t size) {
skip_whitespace(line, &pos);
if (line[pos] == '\0') return strdup("<li></li>\n");
- else return escape_and_add_tags("<li>", line + pos, "</li>", size - pos, escape_html_tag, true);
+ else return escape_and_add_tags("<li>", line + pos, "</li>", size - pos, true);
}
char* convert_quote_line(const char* line, const size_t size) {
@@ -226,7 +214,7 @@ char* convert_quote_line(const char* line, const size_t size) {
skip_whitespace(line, &pos);
if (line[pos] == '\0') return strdup("\n");
- else return escape_and_add_tags("", line + pos, "", size - pos, escape_html_tag, true);
+ else return escape_and_add_tags("", line + pos, "", size - pos, true);
}
char* convert_line(const char* line, const size_t size, const enum linetype type) {