123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576 |
- /*
- HTML Clean for jQuery
- Anthony Johnston
- http://www.antix.co.uk
-
- version 1.3.1
- $Revision$
- requires jQuery http://jquery.com
- Use and distibution http://www.opensource.org/licenses/bsd-license.php
- 2010-04-02 allowedTags/removeTags added (white/black list) thanks to David Wartian (Dwartian)
- 2010-06-30 replaceStyles added for replacement of bold, italic, super and sub styles on a tag
- 2012-04-30 allowedAttributes added, an array of attributed allowed on the elements
- 2013-02-25 now will push non-inline elements up the stack if nested in an inline element
- 2013-02-25 comment element support added, removed by default, see AllowComments in options
- */
- (function ($) {
- $.fn.htmlClean = function (options) {
- // iterate and html clean each matched element
- return this.each(function () {
- var $this = $(this);
- if (this.value) {
- this.value = $.htmlClean(this.value, options);
- } else {
- this.innerHTML = $.htmlClean(this.innerHTML, options);
- }
- });
- };
- // clean the passed html
- $.htmlClean = function (html, options) {
- options = $.extend({}, $.htmlClean.defaults, options);
- var tagsRE = /(<(\/)?(\w+:)?([\w]+)([^>]*)>)|<!--(.*?--)>/gi;
- var attrsRE = /([\w\-\:]+)=(".*?"|'.*?'|[^\s>]*)/gi;
- var tagMatch;
- var root = new Element();
- var stack = [root];
- var container = root;
- var protect = false;
- if (options.bodyOnly) {
- // check for body tag
- if (tagMatch = /<body[^>]*>((\n|.)*)<\/body>/i.exec(html)) {
- html = tagMatch[1];
- }
- }
- html = html.concat("<xxx>"); // ensure last element/text is found
- var lastIndex;
- while (tagMatch = tagsRE.exec(html)) {
- var tag = tagMatch[6]
- ? new Tag("--", null, tagMatch[6], options)
- : new Tag(tagMatch[4], tagMatch[2], tagMatch[5], options);
- // add the text
- var text = html.substring(lastIndex, tagMatch.index);
- if (text.length > 0) {
- var child = container.children[container.children.length - 1];
- if (container.children.length > 0
- && isText(child = container.children[container.children.length - 1])) {
- // merge text
- container.children[container.children.length - 1] = child.concat(text);
- } else {
- container.children.push(text);
- }
- }
- lastIndex = tagsRE.lastIndex;
- if (tag.isClosing) {
- // find matching container
- if (popToTagName(stack, [tag.name])) {
- stack.pop();
- container = stack[stack.length - 1];
- }
- } else {
- // create a new element
- var element = new Element(tag);
- // add attributes
- var attrMatch;
- while (attrMatch = attrsRE.exec(tag.rawAttributes)) {
- // check style attribute and do replacements
- if (attrMatch[1].toLowerCase() == "style"
- && options.replaceStyles) {
- var renderParent = !tag.isInline;
- for (var i = 0; i < options.replaceStyles.length; i++) {
- if (options.replaceStyles[i][0].test(attrMatch[2])) {
- if (!renderParent) {
- tag.render = false;
- renderParent = true;
- }
- container.children.push(element); // assumes not replaced
- stack.push(element);
- container = element; // assumes replacement is a container
- // create new tag and element
- tag = new Tag(options.replaceStyles[i][1], "", "", options);
- element = new Element(tag);
- }
- }
- }
- if (tag.allowedAttributes != null
- && (tag.allowedAttributes.length == 0
- || $.inArray(attrMatch[1], tag.allowedAttributes) > -1)) {
- element.attributes.push(new Attribute(attrMatch[1], attrMatch[2]));
- }
- }
- // add required empty ones
- $.each(tag.requiredAttributes, function () {
- var name = this.toString();
- if (!element.hasAttribute(name)) element.attributes.push(new Attribute(name, ""));
- });
- // check for replacements
- for (var repIndex = 0; repIndex < options.replace.length; repIndex++) {
- for (var tagIndex = 0; tagIndex < options.replace[repIndex][0].length; tagIndex++) {
- var byName = typeof (options.replace[repIndex][0][tagIndex]) == "string";
- if ((byName && options.replace[repIndex][0][tagIndex] == tag.name)
- || (!byName && options.replace[repIndex][0][tagIndex].test(tagMatch))) {
- // set the name to the replacement
- tag.rename(options.replace[repIndex][1]);
- repIndex = options.replace.length; // break out of both loops
- break;
- }
- }
- }
- // check container rules
- var add = true;
- if (!container.isRoot) {
- if (container.tag.isInline && !tag.isInline) {
- if (add = popToContainer(stack)) {
- container = stack[stack.length - 1];
- }
- } else if (container.tag.disallowNest && tag.disallowNest
- && !tag.requiredParent) {
- add = false;
- } else if (tag.requiredParent) {
- if (add = popToTagName(stack, tag.requiredParent)) {
- container = stack[stack.length - 1];
- }
- }
- }
- if (add) {
- container.children.push(element);
- if (tag.toProtect) {
- // skip to closing tag
- while (tagMatch2 = tagsRE.exec(html)) {
- var tag2 = new Tag(tagMatch2[3], tagMatch2[1], tagMatch2[4], options);
- if (tag2.isClosing && tag2.name == tag.name) {
- element.children.push(RegExp.leftContext.substring(lastIndex));
- lastIndex = tagsRE.lastIndex;
- break;
- }
- }
- } else {
- // set as current container element
- if (!tag.isSelfClosing && !tag.isNonClosing) {
- stack.push(element);
- container = element;
- }
- }
- }
- }
- }
- // render doc
- return $.htmlClean.trim(render(root, options).join(""));
- };
- // defaults
- $.htmlClean.defaults = {
- // only clean the body tagbody
- bodyOnly: true,
- // only allow tags in this array, (white list), contents still rendered
- allowedTags: [],
- // remove tags in this array, (black list), contents still rendered
- removeTags: ["basefont", "center", "dir", "font", "frame", "frameset", "iframe", "isindex", "menu", "noframes", "s", "strike", "u"],
- // array of [attributeName], [optional array of allowed on elements] e.g. [["id"], ["style", ["p", "dl"]]] // allow all elements to have id and allow style on 'p' and 'dl'
- allowedAttributes: [],
- // array of attribute names to remove on all elements in addition to those not in tagAttributes e.g ["width", "height"]
- removeAttrs: [],
- // array of [className], [optional array of allowed on elements] e.g. [["aClass"], ["anotherClass", ["p", "dl"]]]
- allowedClasses: [],
- // format the result
- format: false,
- // format indent to start on
- formatIndent: 0,
- // tags to replace, and what to replace with, tag name or regex to match the tag and attributes
- replace: [
- [["b", "big"], "strong"],
- [["i"], "em"]
- ],
- // styles to replace with tags, multiple style matches supported, inline tags are replaced by the first match blocks are retained
- replaceStyles: [
- [/font-weight:\s*bold/i, "strong"],
- [/font-style:\s*italic/i, "em"],
- [/vertical-align:\s*super/i, "sup"],
- [/vertical-align:\s*sub/i, "sub"]
- ],
- allowComments: false
- };
- function applyFormat(element, options, output, indent) {
- if (!element.tag.isInline && output.length > 0) {
- output.push("\n");
- for (i = 0; i < indent; i++) output.push("\t");
- }
- }
- function render(element, options) {
- var output = [], empty = element.attributes.length == 0, indent;
- if (element.tag.isComment) {
- if (options.allowComments) {
- output.push("<!--");
- output.push(element.tag.rawAttributes);
- output.push(">");
- if (options.format) applyFormat(element, options, output, indent - 1);
- }
- } else {
- var openingTag = this.name.concat(element.tag.rawAttributes == undefined ? "" : element.tag.rawAttributes);
- // don't render if not in allowedTags or in removeTags
- var renderTag
- = element.tag.render
- && (options.allowedTags.length == 0 || $.inArray(element.tag.name, options.allowedTags) > -1)
- && (options.removeTags.length == 0 || $.inArray(element.tag.name, options.removeTags) == -1);
- if (!element.isRoot && renderTag) {
- // render opening tag
- output.push("<");
- output.push(element.tag.name);
- $.each(element.attributes, function () {
- if ($.inArray(this.name, options.removeAttrs) == -1) {
- var m = RegExp(/^(['"]?)(.*?)['"]?$/).exec(this.value);
- var value = m[2];
- var valueQuote = m[1] || "'";
- // check for classes allowed
- if (this.name == "class" && options.allowedClasses.length > 0) {
- value =
- $.grep(value.split(" "), function (c) {
- return $.grep(options.allowedClasses, function (a) {
- return a == c
- || (a[0] == c && (a.length == 1 || $.inArray(element.tag.name, a[1]) > -1));
- }).length > 0;
- })
- .join(" ");
- }
- if (value != null && (value.length > 0 || $.inArray(this.name, element.tag.requiredAttributes) > -1)) {
- output.push(" ");
- output.push(this.name);
- output.push("=");
- output.push(valueQuote);
- output.push(value);
- output.push(valueQuote);
- }
- }
- });
- }
- if (element.tag.isSelfClosing) {
- // self closing
- if (renderTag) output.push(" />");
- empty = false;
- } else if (element.tag.isNonClosing) {
- empty = false;
- } else {
- if (!element.isRoot && renderTag) {
- // close
- output.push(">");
- }
- var indent = options.formatIndent++;
- // render children
- if (element.tag.toProtect) {
- var outputChildren = $.htmlClean.trim(element.children.join("")).replace(/<br>/ig, "\n");
- output.push(outputChildren);
- empty = outputChildren.length == 0;
- } else {
- var outputChildren = [];
- for (var i = 0; i < element.children.length; i++) {
- var child = element.children[i];
- var text = $.htmlClean.trim(textClean(isText(child) ? child : child.childrenToString()));
- if (isInline(child)) {
- if (i > 0 && text.length > 0
- && (startsWithWhitespace(child) || endsWithWhitespace(element.children[i - 1]))) {
- outputChildren.push(" ");
- }
- }
- if (isText(child)) {
- if (text.length > 0) {
- outputChildren.push(text);
- }
- } else {
- // don't allow a break to be the last child
- if (i != element.children.length - 1 || child.tag.name != "br") {
- if (options.format) applyFormat(child, options, outputChildren, indent);
- outputChildren = outputChildren.concat(render(child, options));
- }
- }
- }
- options.formatIndent--;
- if (outputChildren.length > 0) {
- if (options.format && outputChildren[0] != "\n") applyFormat(element, options, output, indent);
- output = output.concat(outputChildren);
- empty = false;
- }
- }
- if (!element.isRoot && renderTag) {
- // render the closing tag
- if (options.format) applyFormat(element, options, output, indent - 1);
- output.push("</");
- output.push(element.tag.name);
- output.push(">");
- }
- }
- // check for empty tags
- if (!element.tag.allowEmpty && empty) { return []; }
- }
- return output;
- }
- // find a matching tag, and pop to it, if not do nothing
- function popToTagName(stack, tagNameArray) {
- return pop(
- stack,
- function (element) {
- return $.inArray(element.tag.nameOriginal, tagNameArray) > -1
- });
- }
- function popToContainer(stack) {
- return pop(
- stack,
- function (element) {
- return element.isRoot || !element.tag.isInline;
- });
- }
- function pop(stack, test, index) {
- index = index || 1;
- var element = stack[stack.length - index];
- if (test(element)) {
- return true;
- } else if (stack.length - index > 0
- && pop(stack, test, index + 1)) {
- stack.pop();
- return true;
- }
- return false;
- }
- // Element Object
- function Element(tag) {
- if (tag) {
- this.tag = tag;
- this.isRoot = false;
- } else {
- this.tag = new Tag("root");
- this.isRoot = true;
- }
- this.attributes = [];
- this.children = [];
- this.hasAttribute = function (name) {
- for (var i = 0; i < this.attributes.length; i++) {
- if (this.attributes[i].name == name) return true;
- }
- return false;
- };
- this.childrenToString = function () {
- return this.children.join("");
- };
- return this;
- }
- // Attribute Object
- function Attribute(name, value) {
- this.name = name;
- this.value = value;
- return this;
- }
- // Tag object
- function Tag(name, close, rawAttributes, options) {
- this.name = name.toLowerCase();
- this.nameOriginal = this.name;
- this.render = true;
- this.init = function () {
- if (this.name == "--") {
- this.isComment = true;
- this.isSelfClosing = true;
- } else {
- this.isComment = false;
- this.isSelfClosing = $.inArray(this.name, tagSelfClosing) > -1;
- this.isNonClosing = $.inArray(this.name, tagNonClosing) > -1;
- this.isClosing = (close != undefined && close.length > 0);
- this.isInline = $.inArray(this.name, tagInline) > -1;
- this.disallowNest = $.inArray(this.name, tagDisallowNest) > -1;
- this.requiredParent = tagRequiredParent[$.inArray(this.name, tagRequiredParent) + 1];
- this.allowEmpty = $.inArray(this.name, tagAllowEmpty) > -1;
- this.toProtect = $.inArray(this.name, tagProtect) > -1;
- }
- this.rawAttributes = rawAttributes;
- this.requiredAttributes = tagAttributesRequired[$.inArray(this.name, tagAttributesRequired) + 1];
- if (options) {
- if (!options.tagAttributesCache) options.tagAttributesCache = [];
- if ($.inArray(this.name, options.tagAttributesCache) == -1) {
- var cacheItem = tagAttributes[$.inArray(this.name, tagAttributes) + 1].slice(0);
- // add extra ones from options
- for (var i = 0; i < options.allowedAttributes.length; i++) {
- var attrName = options.allowedAttributes[i][0];
- if ((
- options.allowedAttributes[i].length == 1
- || $.inArray(this.name, options.allowedAttributes[i][1]) > -1
- ) && $.inArray(attrName, cacheItem) == -1) {
- cacheItem.push(attrName);
- }
- }
- options.tagAttributesCache.push(this.name);
- options.tagAttributesCache.push(cacheItem);
- }
- this.allowedAttributes = options.tagAttributesCache[$.inArray(this.name, options.tagAttributesCache) + 1];
- }
- }
- this.init();
- this.rename = function (newName) {
- this.name = newName;
- this.init();
- };
- return this;
- }
- function startsWithWhitespace(item) {
- while (isElement(item) && item.children.length > 0) { item = item.children[0] }
- if (!isText(item)) return false;
- var text = textClean(item);
- return text.length > 0 && $.htmlClean.isWhitespace(text.charAt(0));
- }
- function endsWithWhitespace(item) {
- while (isElement(item) && item.children.length > 0) { item = item.children[item.children.length - 1] }
- if (!isText(item)) return false;
- var text = textClean(item);
- return text.length > 0 && $.htmlClean.isWhitespace(text.charAt(text.length - 1));
- }
- function isText(item) { return item.constructor == String; }
- function isInline(item) { return isText(item) || item.tag.isInline; }
- function isElement(item) { return item.constructor == Element; }
- function textClean(text) {
- return text
- .replace(/ |\n/g, " ")
- .replace(/\s\s+/g, " ");
- }
- // trim off white space, doesn't use regex
- $.htmlClean.trim = function (text) {
- return $.htmlClean.trimStart($.htmlClean.trimEnd(text));
- };
- $.htmlClean.trimStart = function (text) {
- return text.substring($.htmlClean.trimStartIndex(text));
- };
- $.htmlClean.trimStartIndex = function (text) {
- for (var start = 0; start < text.length - 1 && $.htmlClean.isWhitespace(text.charAt(start)); start++);
- return start;
- };
- $.htmlClean.trimEnd = function (text) {
- return text.substring(0, $.htmlClean.trimEndIndex(text));
- };
- $.htmlClean.trimEndIndex = function (text) {
- for (var end = text.length - 1; end >= 0 && $.htmlClean.isWhitespace(text.charAt(end)); end--);
- return end + 1;
- };
- // checks a char is white space or not
- $.htmlClean.isWhitespace = function (c) { return $.inArray(c, whitespace) != -1; };
- // tags which are inline
- var tagInline = [
- "a", "abbr", "acronym", "address", "b", "big", "br", "button",
- "caption", "cite", "code", "del", "em", "font",
- "hr", "i", "input", "img", "ins", "label", "legend", "map", "q",
- "s", "samp", "select", "option", "param", "small", "span", "strike", "strong", "sub", "sup",
- "tt", "u", "var"];
- var tagDisallowNest = ["h1", "h2", "h3", "h4", "h5", "h6", "p", "th", "td", "object"];
- var tagAllowEmpty = ["th", "td"];
- var tagRequiredParent = [
- null,
- "li", ["ul", "ol"],
- "dt", ["dl"],
- "dd", ["dl"],
- "td", ["tr"],
- "th", ["tr"],
- "tr", ["table", "thead", "tbody", "tfoot"],
- "thead", ["table"],
- "tbody", ["table"],
- "tfoot", ["table"],
- "param", ["object"]
- ];
- var tagProtect = ["script", "style", "pre", "code"];
- // tags which self close e.g. <br />
- var tagSelfClosing = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"];
- // tags which do not close
- var tagNonClosing = ["!doctype", "?xml"];
- // attributes allowed on tags
- var tagAttributes = [
- ["class"], // default, for all tags not mentioned
- "?xml", [],
- "!doctype", [],
- "a", ["accesskey", "class", "href", "name", "title", "rel", "rev", "type", "tabindex"],
- "abbr", ["class", "title"],
- "acronym", ["class", "title"],
- "blockquote", ["cite", "class"],
- "button", ["class", "disabled", "name", "type", "value"],
- "del", ["cite", "class", "datetime"],
- "form", ["accept", "action", "class", "enctype", "method", "name"],
- "input", ["accept", "accesskey", "alt", "checked", "class", "disabled", "ismap", "maxlength", "name", "size", "readonly", "src", "tabindex", "type", "usemap", "value"],
- "img", ["alt", "class", "height", "src", "width"],
- "ins", ["cite", "class", "datetime"],
- "label", ["accesskey", "class", "for"],
- "legend", ["accesskey", "class"],
- "link", ["href", "rel", "type"],
- "meta", ["content", "http-equiv", "name", "scheme", "charset"],
- "map", ["name"],
- "optgroup", ["class", "disabled", "label"],
- "option", ["class", "disabled", "label", "selected", "value"],
- "q", ["class", "cite"],
- "script", ["src", "type"],
- "select", ["class", "disabled", "multiple", "name", "size", "tabindex"],
- "style", ["type"],
- "table", ["class", "summary"],
- "th", ["class", "colspan", "rowspan"],
- "td", ["class", "colspan", "rowspan"],
- "textarea", ["accesskey", "class", "cols", "disabled", "name", "readonly", "rows", "tabindex"],
- "param", ["name", "value"],
- "embed", ["height", "src", "type", "width"]
- ];
- var tagAttributesRequired = [[], "img", ["alt"]];
- // white space chars
- var whitespace = [" ", " ", "\t", "\n", "\r", "\f"];
- })(jQuery);
|