javascript - Regex to not match partial sequences, but match full ones -

i have escaped html this:

&lt;img border=&#039;0&#039; /&gt;

i'm trying match , replace full escape sequences ' not partial, 39, since 39 not in unescaped string. essentially, each escape sequence should treated single token.

this js regex. there way exclude matches between & , ; while still accepting sequences include both of characters?

desired results:

search <img border='0' /> lt: no match.
search <img border='0' /> 39: no match.
search <img border='0' /> ': match.
search <img border='0' /> border=': match.

current code:

> var str = '&lt;img border=&#039;0&#039; /&gt;' > str.replace(/(border)/gi, '|$1|') '&lt;img |border|=&#039;0&#039; /&gt;'  // ok > str.replace(/(39)/gi, '|$1|') '&lt;img border=&#0|39|;0&#0|39|; /&gt;'  // not ok

note: can't unescape , re-escape match. has escaped.

the op wants javascript regex match , replace string within escaped html while treating escape sequences (e.g. <, ', or ) single characters, , not unescape html string during replacement process.

this means replacing

"lt" "[lt]" in "< lt" result in "< [lt]" (avoid match within entity)
"<" "[<]" in "< lt" result in "[<] lt" (match entity)
"&l" "[&l]" in "< &lt" result in "< [&l]t"(not match partial entity)
"t;" "[t;]" in "< lt;" result in "< l[t;]" (not match partial entity)
"< l" "[< l]" in "< lt" result in "[< l]t" (match including entity)
"lt; &l" "[lt; &l]" in "< &lt" result in "< &lt" (not match partial entity)
"t; <" "[t; <]" in "lt; <" result in "l[t; <]" (match including entity)
"t; &lt" "[t; &lt]" in "lt; <" result in "lt; <" (not match partial entity)

with following regex capturing escaped sequences (e.g. <, ', or ),

/&[a-z]+;|&#x[a-f\d]+;|&#\d+;/gi

we may use following function starting point handles of cases above (#1, #2, #4, #5, , #7):

function searchandreplace(searchfor, replacement, str) {   return str.replace(     new regexp(       prepare(searchfor) +        "|(&[a-z]+;|&#x[a-f\\d]+;|&#\\d+;)", // consume entities       "gi"     ),     function(m, entity) {       return entity || replacement;     }   ); }  function prepare(str) {   return str.replace(/[^\w\s]/g, "\\$&"); //escape regex metachars [1] }  // [1] http://eloquentjavascript.net/09_regexp.html#h_rhu25fogrg

the remaining cases (#3, #6, #8) involve potential partial escaped sequence @ end of search string.

a solution check searchfor string potential partial escaped sequences @ end , append corresponding negated lookahead (?!) prevent matching valid escaped sequence. full solution (passing set of 40 test cases) shown below, , should faster , less complex .exec() approach:

function searchandreplace(searchfor, replacement, str) {    return str.replace(      new regexp(        prepare(searchfor) +         "|(&[a-z]+;|&#x[a-f0-9]+;|&#\\d+;)",         "gi"      ),      function(m, entity) {        return entity || replacement;      }    );  }    function prepare(str) {    var add = "";    if (/&$/.test(str)) {      add = "(?!#x[a-z\\d]+;|#\\d+;|[a-z]+;)";    } else if (/&[a-z]+$/i.test(str)) {      add = "(?![a-z]*;)";    } else if (/&#$/.test(str)) {      add = "(?!x[a-f\\d]+;|\\d+;)";    } else if (/&#x$/.test(str)) {      add = "(?![a-f\\d]+;)";    } else if (/&#x[a-f\d]+$/i.test(str)) {      add = "(?![a-f\\d]*;)";    }    return str.replace(/[^\w\s]/g, "\\$&") + add;  }    // test function    function test(searchfor, replacement, str, expected) {    var result = searchandreplace(searchfor, replacement, str);    console.log(      searchfor +        ": " +        (result === expected ? "passed" : "failed: " + [expected, result])    );  }    // test cases    test("lt", "[lt]", "&lt;img border=&#039;0&#039; /&gt;", "&lt;img border=&#039;0&#039; /&gt;");  test("39", "[39]", "&lt;img border=&#039;0&#039; /&gt;", "&lt;img border=&#039;0&#039; /&gt;");  test("&#039;", "[&#039;]", "&lt;img border=&#039;0&#039; /&gt;", "&lt;img border=[&#039;]0[&#039;] /&gt;");  test("border=&#039;", "[border=&#039;]", "&lt;img border=&#039;0&#039; /&gt;", "&lt;img [border=&#039;]0&#039; /&gt;");  test("39&", "[39&]", "39&lt;img border=39&#039;&gt&gt&&#039 t; 0&#039;&39; /&gt;", "39&lt;img border=39&#039;&gt&gt&&#039 t; 0&#039;&39; /&gt;")  test("0&#", "[0&#]", "39&lt;img border=39&#039;&gt&gt&&#039 t; 0&#039;&39; /&gt;", "39&lt;img border=39&#039;&gt&gt&&#039 t; 0&#039;&39; /&gt;")  test("lt", "[]", "&lt&lt;t;t&l", "&[]&lt;t;t&l");  test("&lt;", "[]", "&lt&lt;t;t&l", "&lt[]t;t&l");  test("&l", "[]", "&lt&lt;t;t&l", "[]t&lt;t;t[]");  test("t;", "[]", "&lt&lt;t;t&l", "&lt&lt;[]t&l");  test("t&", "[]", "&lt&lt;t;t&l", "&lt&lt;t;[]l");  test("&lt;t", "[]", "&lt&lt;t;t&l", "&lt[];t&l");  test("t&lt;", "[]", "&lt&lt;t;t&l", "&l[]t;t&l");  test("t;t", "[]", "&lt&lt;t;t&l", "&lt&lt;[]&l");  test("t&l", "[]", "&lt&lt;t;t&l", "&lt&lt;t;[]");  test("39", "[]", "&#039&#039;9;9&#", "&#0[]&#039;9;9&#");  test("&#039;", "[]", "&#039&#039;9;9&#", "&#039[]9;9&#");  test("&", "[]", "&#039&#039;9;9&#", "[]#039&#039;9;9[]#");  test("&#", "[]", "&#039&#039;9;9&#", "[]039&#039;9;9[]");  test("9;", "[]", "&#039&#039;9;9&#", "&#039&#039;[]9&#");  test("9&", "[]", "&#039&#039;9;9&#", "&#039&#039;9;[]#");  test("&#039;9", "[]", "&#039&#039;9;9&#", "&#039[];9&#");  test("9&#039;", "[]", "&#039&#039;9;9&#", "&#03[]9;9&#");  test("9;9", "[]", "&#039&#039;9;9&#", "&#039&#039;[]&#");  test("9&#", "[]", "&#039&#039;9;9&#", "&#039&#039;9;[]");  test("x7", "[]", "&#x7f&#x7f;f;f&#x", "&#[]f&#x7f;f;f&#x");  test("&#x7f;", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f[]f;f&#x");  test("&", "[]", "&#x7f&#x7f;f;f&#x", "[]#x7f&#x7f;f;f[]#x");  test("&#", "[]", "&#x7f&#x7f;f;f&#x", "[]x7f&#x7f;f;f[]x");  test("&#x", "[]", "&#x7f&#x7f;f;f&#x", "[]7f&#x7f;f;f[]");  test("&#x7", "[]", "&#x7f&#x7f;f;f&#x", "[]f&#x7f;f;f&#x");  test("f;", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f&#x7f;[]f&#x");  test("f&", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f&#x7f;f;[]#x");  test("&#x7f;f", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f[];f&#x");  test("f&#x7f;", "[]", "&#x7f&#x7f;f;f&#x", "&#x7[]f;f&#x");  test("f;f", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f&#x7f;[]&#x");  test("f&#", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f&#x7f;f;[]x");  test("f&#x", "[]", "&#x7f&#x7f;f;f&#x", "&#x7f&#x7f;f;[]");  test("t; &lt; lt &l", "[]", "&lt; &lt; lt &lt;lt; &lt; lt &lt", "&lt; &lt; lt &lt;l[]t");

Search This Blog

New Generation Education

javascript - Regex to not match partial sequences, but match full ones -

Comments

Post a Comment

Popular posts from this blog

cookies - Yii2 Advanced - Share session between frontend and mainsite (duplicate of frontend for www) -

angular - password and confirm password field validation angular2 reactive forms -

javascript - Angular2 intelliJ config error.. Cannot find module '@angular/core' -