module nyinaa.sanitizers; /*********************************** * strip tags from an HTML string * Params: * input = the HTML string you want to strip tags from * allowedTags = an array of tag names allow in final output */ string stripTags(string input, in string[] allowedTags = []) { import std.regex : Captures, replaceAll, ctRegex; auto regex = ctRegex!(`</?(\w*)>`); string regexHandler(Captures!(string) match) { string insertSlash(in string tag) in { assert(tag.length, "Argument must contain one or more characters"); } do { return tag[0 .. 1] ~ "/" ~ tag[1 .. $]; } bool allowed = false; foreach (tag; allowedTags) { if (tag == match.hit || insertSlash(tag) == match.hit) { allowed = true; break; } } return allowed ? match.hit : ""; } return input.replaceAll!(regexHandler)(regex); } /// unittest { assert(stripTags("<html><b>bold</b></html>") == "bold"); assert(stripTags("<html><b>bold</b></html>", ["<html>"]) == "<html>bold</html>"); }