refactor($sanitize): new implementation of the html sanitized parser · angular/angular.js@35a2153 · GitHub

  • -This error occurs when the HTML string passed to ‘$sanitize’ can’t be parsed by the sanitizer.
  • -The error contains part of the html string that can’t be parsed.
  • -The parser is more strict than a typical browser parser, so it’s possible that some obscure input would produce this error despite the string being recognized as valid HTML by a browser.

angular.js – AngularJS – HTML enhanced for web apps!

@fenceposterror: @0x6D6172696F Looks like it was done in this commit. So introduced in AngularJS 1.5.0beta1. The mXSS fix was later.

refactor($sanitize): new implementation of the html sanitized parser · angular/angular.js@35a2153 · GitHub

Skip to content

Ignore Learn more Please note that GitHub no longer supports old versions of Firefox.

We recommend upgrading to the latest Safari, Google Chrome, or Firefox.

Features Explore Pricing

This repository

Sign in or Sign up

Watch 4,415

Star 55,121

Fork 27,508

angular / angular.js

Code Issues 608 Pull requests 175 Projects 0 Wiki Pulse Graphs

Permalink Browse files refactor($sanitize): new implementation of the html sanitized parser

This implementation is based on using inert document parsed by the browser Closes #11442 Closes #11443 Closes #12524

Loading branch information…

1 parent 1c97a60 commit 35a21532b73d5bd84b4325211c563e6a3e2dde82 mhevery committed with petebacondarwin May 1, 2015

Unified Split

Showing 5 changed files with 134 additions and 560 deletions.

+0 −11 docs/content/error/$sanitize/badparse.ngdoc

+10 −0 docs/content/error/$sanitize/ddns.ngdoc

+0 −309 lib/htmlparser/htmlparser.js

+95 −191 src/ngSanitize/sanitize.js

+29 −49 test/ngSanitize/sanitizeSpec.js

Show comments View

11 docs/content/error/$sanitize/badparse.ngdoc

@@ -1,11 +0,0 @@

-@ngdoc error

-@name $sanitize:badparse

-@fullName Parsing Error while Sanitizing

-@description

-This error occurs when the HTML string passed to ‘$sanitize’ can’t be parsed by the sanitizer.

-The error contains part of the html string that can’t be parsed.

-The parser is more strict than a typical browser parser, so it’s possible that some obscure input would produce this error despite the string being recognized as valid HTML by a browser.

-If a valid html code results in this error, please file a bug.

Show comments View

10 docs/content/error/$sanitize/ddns.ngdoc

@@ -0,0 +1,10 @@

+@ngdoc error

+@name $sanitize:ddns

+@fullName DOMDocument not supported

+@description

+

+This error occurs when `$sanitize` sanitizer determines that `DOMDocument` api is not supported by the current browser.

+

+This api is necessary for safe parsing of HTML strings into DOM trees and without it the sanitizer can’t sanitize the input.

+

+The api is present in all supported browsers including IE 9.0, so the presence of this error usually indicates that Angular’s `$sanitize` is being used on an unsupported platform.

Show comments View

309 lib/htmlparser/htmlparser.js

@@ -1,309 +0,0 @@

-/*

– * HTML Parser By John Resig (ejohn.org)

– * Original code by Erik Arvidsson, Mozilla Public License

– * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js

– *

– * // Use like so:

– * htmlParser(htmlString, {

– * start: function(tag, attrs, unary) {},

– * end: function(tag) {},

– * chars: function(text) {},

– * comment: function(text) {}

– * });

– *

– * // or to get an XML string:

– * HTMLtoXML(htmlString);

– *

– * // or to get an XML DOM Document

– * HTMLtoDOM(htmlString);

– *

– * // or to inject into an existing document/DOM node

– * HTMLtoDOM(htmlString, document);

– * HTMLtoDOM(htmlString, document.body);

– *

– */

-(function(){

– // Regular Expressions for parsing tags and attributes

– var startTag = / ^<(\w +)((?:\s + \w +(?:\s *=\s *(?:(?:"[^"] *")|(?:'[^'] *')| [^>\s] +))?)*)\s *(\/ ?)>/,

– endTag = / ^<\/(\w +)[^>] *>/,

– attr = /(\w +)(?:\s *=\s *(?:(?:”((?:\\ . | [^”])*)”)|(?:'((?:\\ . | [^’])*)’)|([^>\s] +)))? /g;

– // Empty Elements – HTML 4.01

– var empty = makeMap(“area,base,basefont,br,col,frame,hr,img,input,isindex,link,meta,param,embed”);

– // Block Elements – HTML 4.01

– var block = makeMap(“address,applet,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,form,frameset,hr,iframe,ins,isindex,li,map,menu,noframes,noscript,object,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul”);

– // Inline Elements – HTML 4.01

– var inline = makeMap(“a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,textarea,tt,u,var”);

– // Elements that you can, intentionally, leave open

– // (and which close themselves)

– var closeSelf = makeMap(“colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr”);

– // Attributes that have their values filled in disabled=”disabled”

– var fillAttrs = makeMap(“checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected”);

– // Special Elements (can contain anything)

– var special = makeMap(“script,style”);

– var htmlParser = this.htmlParser = function( html, handler ) {

– var index, chars, match, stack = [], last = html;

– stack.last = function(){

– return this[ this.length – 1 ];

– };

– while ( html ) {

– chars = true;

– // Make sure we’re not in a script or style element

– if ( ! stack.last() || !special[ stack.last() ] ) {

– // Comment

– if ( html.indexOf(““);

– if ( index >= 0 ) {

– if ( handler.comment )

– handler.comment( html.substring( 4, index ) );

– html = html.substring( index + 3 );

– chars = false;

– }

– // end tag

– } else if ( html.indexOf(“]*>”), function(all, text){

– text = text.replace(//g, “$1”)

– .replace(/

– if ( handler.chars )

– handler.chars( text );

– return ” “;

– });

– parseEndTag( ” “, stack.last() );

– }

– if ( html == last )

– throw “Parse Error: ” + html;

– last = html;

– }

– // Clean up any remaining tags

– parseEndTag();

– function parseStartTag( tag, tagName, rest, unary ) {

– if ( block[ tagName ] ) {

– while ( stack.last() && inline[ stack.last() ] ) {

– parseEndTag( ” “, stack.last() );

– }

– }

– if ( closeSelf[ tagName ] && stack.last() == tagName ) {

– parseEndTag( ” “, tagName );

– }

– unary = empty[ tagName ] || !!unary;

– if ( !unary )

– stack.push( tagName );

– if ( handler.start ) {

– var attrs = [];

– rest.replace(attr, function(match, name) {

– var value = arguments[2] ? arguments[2] :

– arguments[3] ? arguments[3] :

– arguments[4] ? arguments[4] :

– fillAttrs[name] ? name : ” “;

– attrs.push({

– name: name,

– value: value,

– escaped: value.replace(/(^ | [^ \\])”/g, ‘$1\\\” ‘) //”

– });

– });

– if ( handler.start )

– handler.start( tagName, attrs, unary );

– }

– }

– function parseEndTag( tag, tagName ) {

– // If no tag name is provided, clean shop

– if ( !tagName )

– var pos = 0;

– // Find the closest opened tag of the same type

– else

– for ( var pos = stack.length – 1; pos >= 0; pos– )

– if ( stack[ pos ] == tagName )

– break;

– if ( pos >= 0 ) {

– // Close all the open elements, up the stack

– for ( var i = stack.length – 1; i >= pos; i– )

– if ( handler.end )

– handler.end( stack[ i ] );

– // Remove the open elements from the stack

– stack.length = pos;

– }

– }

– };

– this.HTMLtoXML = function( html ) {

– var results = ” “;

– htmlParser(html, {

– start : function( tag, attrs, unary ) {

– results += “<" + tag; – – for ( var i = 0; i < attrs.length; i++ ) – results += ” ” + attrs[i].name + ‘=”‘ + attrs[i].escaped + ‘”‘; – – results += (unary ? “/” : ” “) + “>”;

– },

– end : function( tag ) {

– results += ““;

– },

– chars : function( text ) {

– results += text;

– },

– comment : function( text ) {

– results += ““;

– }

– });

– return results;

– };

– this.HTMLtoDOM = function( html, doc ) {

– // There can be only one of these elements

– var one = makeMap(“html,head,body,title”);

– // Enforce a structure for the document

– var structure = {

– link: “head”,

– base: “head”

– };

– if ( !doc ) {

– if ( typeof DOMDocument != “undefined” )

– doc = new DOMDocument();

– else if ( typeof document != “undefined” && document.implementation && document.implementation.createDocument )

– doc = document.implementation.createDocument(” “, ” “, null);

– else if ( typeof ActiveX != “undefined” )

– doc = new ActiveXObject(“Msxml.DOMDocument”);

– } else

– doc = doc.ownerDocument ||

– doc.getOwnerDocument && doc.getOwnerDocument() ||

– doc;

– var elems = [],

– documentElement = doc.documentElement ||

– doc.getDocumentElement && doc.getDocumentElement();

– // If we’re dealing with an empty document then we

– // need to pre-populate it with the HTML document structure

– if ( !documentElement && doc.createElement ) (function(){

– var html = doc.createElement(“html”);

– var head = doc.createElement(“head”);

– head.appendChild( doc.createElement(“title”) );

– html.appendChild( head );

– html.appendChild( doc.createElement(“body”) );

– doc.appendChild( html );

– })();

– // Find all the unique elements

– if ( doc.getElementsByTagName )

– for ( var i in one )

– one[ i ] = doc.getElementsByTagName( i )[0];

– // If we’re working with a document, inject contents into

– // the body element

– var curParentNode = one.body;

– htmlParser( html, {

– start : function( tagName, attrs, unary ) {

– // If it’s a pre-built element, then we can ignore

– // its construction

– if ( one[ tagName ] ) {

– curParentNode = one[ tagName ];

– return;

– }

– var elem = doc.createElement( tagName );

– for ( var attr in attrs )

– elem.setAttribute( attrs[ attr ].name, attrs[ attr ].value );

– if ( structure[ tagName ] && typeof one[ structure[ tagName ] ] != “boolean” )

– one[ structure[ tagName ] ].appendChild( elem );

– else if ( curParentNode && curParentNode.appendChild )

– curParentNode.appendChild( elem );

– if ( !unary ) {

– elems.push( elem );

– curParentNode = elem;

– }

– },

– end : function( tag ) {

– elems.length -= 1;

– // Init the new parentNode

– curParentNode = elems[ elems.length – 1 ];

– },

– chars : function( text ) {

– curParentNode.appendChild( doc.createTextNode( text ) );

– },

– comment : function( text ) {

– // create comment node

– }

– });

– return doc;

– };

– function makeMap(str){

– var obj = {}, items = str.split(“,”);

– for ( var i = 0; i < items.length; i++ ) – obj[ items[i] ] = true; – return obj; – } -})(); Oops, something went wrong. Retry 0 comments on commit 35a2153 Please sign in to comment. Contact GitHub API Training Shop Blog About © 2017 GitHub, Inc. Terms Privacy Security Status Help You can’t perform that action at this time. You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.

refactor($sanitize): new implementation of the html sanitized parser · angular/angular.js@35a2153 · GitHub