javascript - Regex to not match partial sequences, but match full ones -


i have escaped html this:

<img border='0' /> 

i'm trying match , replace full escape sequences ' not partial, 39, since 39 not in unescaped string. essentially, each escape sequence should treated single token.

this js regex. there way exclude matches between & , ; while still accepting sequences include both of characters?

desired results:

  • search <img border='0' /> lt: no match.
  • search <img border='0' /> 39: no match.
  • search <img border='0' /> ': match.
  • search <img border='0' /> border=': match.

current code:

> var str = '<img border='0' />' > str.replace(/(border)/gi, '|$1|') '<img |border|='0' />'  // ok > str.replace(/(39)/gi, '|$1|') '<img border=&#0|39|;0&#0|39|; />'  // not ok 

note: can't unescape , re-escape match. has escaped.

the op wants javascript regex match , replace string within escaped html while treating escape sequences (e.g. <, ', or ) single characters, , not unescape html string during replacement process.

this means replacing

  1. "lt" "[lt]" in "< lt" result in "< [lt]" (avoid match within entity)
  2. "<" "[<]" in "< lt" result in "[<] lt" (match entity)
  3. "&l" "[&l]" in "< &lt" result in "< [&l]t"(not match partial entity)
  4. "t;" "[t;]" in "< lt;" result in "< l[t;]" (not match partial entity)
  5. "< l" "[< l]" in "< lt" result in "[< l]t" (match including entity)
  6. "lt; &l" "[lt; &l]" in "< &lt" result in "< &lt" (not match partial entity)
  7. "t; <" "[t; <]" in "lt; <" result in "l[t; <]" (match including entity)
  8. "t; &lt" "[t; &lt]" in "lt; <" result in "lt; <" (not match partial entity)

with following regex capturing escaped sequences (e.g. <, ', or ),

/&[a-z]+;|&#x[a-f\d]+;|&#\d+;/gi 

we may use following function starting point handles of cases above (#1, #2, #4, #5, , #7):

function searchandreplace(searchfor, replacement, str) {   return str.replace(     new regexp(       prepare(searchfor) +        "|(&[a-z]+;|&#x[a-f\\d]+;|&#\\d+;)", // consume entities       "gi"     ),     function(m, entity) {       return entity || replacement;     }   ); }  function prepare(str) {   return str.replace(/[^\w\s]/g, "\\$&"); //escape regex metachars [1] }  // [1] http://eloquentjavascript.net/09_regexp.html#h_rhu25fogrg 

the remaining cases (#3, #6, #8) involve potential partial escaped sequence @ end of search string.

a solution check searchfor string potential partial escaped sequences @ end , append corresponding negated lookahead (?!) prevent matching valid escaped sequence. full solution (passing set of 40 test cases) shown below, , should faster , less complex .exec() approach:

function searchandreplace(searchfor, replacement, str) {    return str.replace(      new regexp(        prepare(searchfor) +         "|(&[a-z]+;|&#x[a-f0-9]+;|&#\\d+;)",         "gi"      ),      function(m, entity) {        return entity || replacement;      }    );  }    function prepare(str) {    var add = "";    if (/&$/.test(str)) {      add = "(?!#x[a-z\\d]+;|#\\d+;|[a-z]+;)";    } else if (/&[a-z]+$/i.test(str)) {      add = "(?![a-z]*;)";    } else if (/&#$/.test(str)) {      add = "(?!x[a-f\\d]+;|\\d+;)";    } else if (/&#x$/.test(str)) {      add = "(?![a-f\\d]+;)";    } else if (/&#x[a-f\d]+$/i.test(str)) {      add = "(?![a-f\\d]*;)";    }    return str.replace(/[^\w\s]/g, "\\$&") + add;  }    // test function    function test(searchfor, replacement, str, expected) {    var result = searchandreplace(searchfor, replacement, str);    console.log(      searchfor +        ": " +        (result === expected ? "passed" : "failed: " + [expected, result])    );  }    // test cases    test("lt", "[lt]", "<img border='0' />", "<img border='0' />");  test("39", "[39]", "<img border='0' />", "<img border='0' />");  test("'", "[']", "<img border='0' />", "<img border=[']0['] />");  test("border='", "[border=']", "<img border='0' />", "<img [border=']0' />");  test("39&", "[39&]", "39<img border=39'&gt&gt&&#039 t; 0'&39; />", "39<img border=39'&gt&gt&&#039 t; 0'&39; />")  test("0&#", "[0&#]", "39<img border=39'&gt&gt&&#039 t; 0'&39; />", "39<img border=39'&gt&gt&&#039 t; 0'&39; />")  test("lt", "[]", "&lt<t;t&l", "&[]<t;t&l");  test("<", "[]", "&lt<t;t&l", "&lt[]t;t&l");  test("&l", "[]", "&lt<t;t&l", "[]t<t;t[]");  test("t;", "[]", "&lt<t;t&l", "&lt<[]t&l");  test("t&", "[]", "&lt<t;t&l", "&lt<t;[]l");  test("<t", "[]", "&lt<t;t&l", "&lt[];t&l");  test("t<", "[]", "&lt<t;t&l", "&l[]t;t&l");  test("t;t", "[]", "&lt<t;t&l", "&lt<[]&l");  test("t&l", "[]", "&lt<t;t&l", "&lt<t;[]");  test("39", "[]", "&#039'9;9&#", "&#0[]'9;9&#");  test("'", "[]", "&#039'9;9&#", "&#039[]9;9&#");  test("&", "[]", "&#039'9;9&#", "[]#039'9;9[]#");  test("&#", "[]", "&#039'9;9&#", "[]039'9;9[]");  test("9;", "[]", "&#039'9;9&#", "&#039'[]9&#");  test("9&", "[]", "&#039'9;9&#", "&#039'9;[]#");  test("'9", "[]", "&#039'9;9&#", "&#039[];9&#");  test("9'", "[]", "&#039'9;9&#", "&#03[]9;9&#");  test("9;9", "[]", "&#039'9;9&#", "&#039'[]&#");  test("9&#", "[]", "&#039'9;9&#", "&#039'9;[]");  test("x7", "[]", "߿f&#x", "&#[]ff;f&#x");  test("", "[]", "߿f&#x", "&#x7f[]f;f&#x");  test("&", "[]", "߿f&#x", "[]#x7ff;f[]#x");  test("&#", "[]", "߿f&#x", "[]x7ff;f[]x");  test("&#x", "[]", "߿f&#x", "[]7ff;f[]");  test("&#x7", "[]", "߿f&#x", "[]ff;f&#x");  test("f;", "[]", "߿f&#x", "&#x7f[]f&#x");  test("f&", "[]", "߿f&#x", "߿[]#x");  test("f", "[]", "߿f&#x", "&#x7f[];f&#x");  test("f", "[]", "߿f&#x", "&#x7[]f;f&#x");  test("f;f", "[]", "߿f&#x", "&#x7f[]&#x");  test("f&#", "[]", "߿f&#x", "߿[]x");  test("f&#x", "[]", "߿f&#x", "߿[]");  test("t; < lt &l", "[]", "< < lt <lt; < lt &lt", "< < lt <l[]t");


Comments

Popular posts from this blog

php - Permission denied. Laravel linux server -

google bigquery - Delta between query execution time and Java query call to finish -

python - Pandas two dataframes multiplication? -