001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import org.apache.commons.lang3.StringUtils; 020import org.apache.commons.text.translate.AggregateTranslator; 021import org.apache.commons.text.translate.CharSequenceTranslator; 022import org.apache.commons.text.translate.CsvTranslators; 023import org.apache.commons.text.translate.EntityArrays; 024import org.apache.commons.text.translate.JavaUnicodeEscaper; 025import org.apache.commons.text.translate.LookupTranslator; 026import org.apache.commons.text.translate.NumericEntityEscaper; 027import org.apache.commons.text.translate.NumericEntityUnescaper; 028import org.apache.commons.text.translate.OctalUnescaper; 029import org.apache.commons.text.translate.UnicodeUnescaper; 030import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 031 032import java.io.IOException; 033import java.io.Writer; 034import java.util.Collections; 035import java.util.HashMap; 036import java.util.Map; 037 038/** 039 * <p>Escapes and unescapes {@code String}s for 040 * Java, Java Script, HTML and XML.</p> 041 * 042 * <p>#ThreadSafe#</p> 043 * 044 * 045 * <p> 046 * This code has been adapted from Apache Commons Lang 3.5. 047 * </p> 048 * 049 * @since 1.0 050 */ 051public class StringEscapeUtils { 052 053 /* ESCAPE TRANSLATORS */ 054 055 /** 056 * Translator object for escaping Java. 057 * 058 * While {@link #escapeJava(String)} is the expected method of use, this 059 * object allows the Java escaping functionality to be used 060 * as the foundation for a custom translator. 061 */ 062 public static final CharSequenceTranslator ESCAPE_JAVA; 063 static { 064 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 065 escapeJavaMap.put("\"", "\\\""); 066 escapeJavaMap.put("\\", "\\\\"); 067 ESCAPE_JAVA = new AggregateTranslator( 068 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 069 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 070 JavaUnicodeEscaper.outsideOf(32, 0x7f) 071 ); 072 } 073 074 /** 075 * Translator object for escaping EcmaScript/JavaScript. 076 * 077 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 078 * object allows the EcmaScript escaping functionality to be used 079 * as the foundation for a custom translator. 080 */ 081 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 082 static { 083 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 084 escapeEcmaScriptMap.put("'", "\\'"); 085 escapeEcmaScriptMap.put("\"", "\\\""); 086 escapeEcmaScriptMap.put("\\", "\\\\"); 087 escapeEcmaScriptMap.put("/", "\\/"); 088 ESCAPE_ECMASCRIPT = new AggregateTranslator( 089 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 090 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 091 JavaUnicodeEscaper.outsideOf(32, 0x7f) 092 ); 093 } 094 095 /** 096 * Translator object for escaping Json. 097 * 098 * While {@link #escapeJson(String)} is the expected method of use, this 099 * object allows the Json escaping functionality to be used 100 * as the foundation for a custom translator. 101 */ 102 public static final CharSequenceTranslator ESCAPE_JSON; 103 static { 104 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 105 escapeJsonMap.put("\"", "\\\""); 106 escapeJsonMap.put("\\", "\\\\"); 107 escapeJsonMap.put("/", "\\/"); 108 ESCAPE_JSON = new AggregateTranslator( 109 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 110 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 111 JavaUnicodeEscaper.outsideOf(32, 0x7f) 112 ); 113 } 114 115 /** 116 * Translator object for escaping XML 1.0. 117 * 118 * While {@link #escapeXml10(String)} is the expected method of use, this 119 * object allows the XML escaping functionality to be used 120 * as the foundation for a custom translator. 121 */ 122 public static final CharSequenceTranslator ESCAPE_XML10; 123 static { 124 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 125 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 126 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 127 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 128 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 129 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 130 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 131 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 132 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 133 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 134 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 135 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 136 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 137 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 138 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 139 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 140 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 141 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 142 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 143 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 144 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 145 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 146 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 147 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 148 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 149 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 150 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 151 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 152 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 153 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 154 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 155 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 156 ESCAPE_XML10 = new AggregateTranslator( 157 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 158 new LookupTranslator(EntityArrays.APOS_ESCAPE), 159 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 160 NumericEntityEscaper.between(0x7f, 0x84), 161 NumericEntityEscaper.between(0x86, 0x9f), 162 new UnicodeUnpairedSurrogateRemover() 163 ); 164 } 165 166 /** 167 * Translator object for escaping XML 1.1. 168 * 169 * While {@link #escapeXml11(String)} is the expected method of use, this 170 * object allows the XML escaping functionality to be used 171 * as the foundation for a custom translator. 172 */ 173 public static final CharSequenceTranslator ESCAPE_XML11; 174 static { 175 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 176 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 177 escapeXml11Map.put("\u000b", ""); 178 escapeXml11Map.put("\u000c", ""); 179 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 180 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 181 ESCAPE_XML11 = new AggregateTranslator( 182 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 183 new LookupTranslator(EntityArrays.APOS_ESCAPE), 184 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 185 NumericEntityEscaper.between(0x1, 0x8), 186 NumericEntityEscaper.between(0xe, 0x1f), 187 NumericEntityEscaper.between(0x7f, 0x84), 188 NumericEntityEscaper.between(0x86, 0x9f), 189 new UnicodeUnpairedSurrogateRemover() 190 ); 191 } 192 193 /** 194 * Translator object for escaping HTML version 3.0. 195 * 196 * While {@link #escapeHtml3(String)} is the expected method of use, this 197 * object allows the HTML escaping functionality to be used 198 * as the foundation for a custom translator. 199 */ 200 public static final CharSequenceTranslator ESCAPE_HTML3 = 201 new AggregateTranslator( 202 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 203 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 204 ); 205 206 /** 207 * Translator object for escaping HTML version 4.0. 208 * 209 * While {@link #escapeHtml4(String)} is the expected method of use, this 210 * object allows the HTML escaping functionality to be used 211 * as the foundation for a custom translator. 212 */ 213 public static final CharSequenceTranslator ESCAPE_HTML4 = 214 new AggregateTranslator( 215 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 216 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 217 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 218 ); 219 220 /** 221 * Translator object for escaping individual Comma Separated Values. 222 * 223 * While {@link #escapeCsv(String)} is the expected method of use, this 224 * object allows the CSV escaping functionality to be used 225 * as the foundation for a custom translator. 226 */ 227 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 228 229 /** 230 * Translator object for escaping Shell command language. 231 * 232 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 233 */ 234 public static final CharSequenceTranslator ESCAPE_XSI; 235 static { 236 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 237 escapeXsiMap.put("|", "\\|"); 238 escapeXsiMap.put("&", "\\&"); 239 escapeXsiMap.put(";", "\\;"); 240 escapeXsiMap.put("<", "\\<"); 241 escapeXsiMap.put(">", "\\>"); 242 escapeXsiMap.put("(", "\\("); 243 escapeXsiMap.put(")", "\\)"); 244 escapeXsiMap.put("$", "\\$"); 245 escapeXsiMap.put("`", "\\`"); 246 escapeXsiMap.put("\\", "\\\\"); 247 escapeXsiMap.put("\"", "\\\""); 248 escapeXsiMap.put("'", "\\'"); 249 escapeXsiMap.put(" ", "\\ "); 250 escapeXsiMap.put("\t", "\\\t"); 251 escapeXsiMap.put("\r\n", ""); 252 escapeXsiMap.put("\n", ""); 253 escapeXsiMap.put("*", "\\*"); 254 escapeXsiMap.put("?", "\\?"); 255 escapeXsiMap.put("[", "\\["); 256 escapeXsiMap.put("#", "\\#"); 257 escapeXsiMap.put("~", "\\~"); 258 escapeXsiMap.put("=", "\\="); 259 escapeXsiMap.put("%", "\\%"); 260 ESCAPE_XSI = new LookupTranslator( 261 Collections.unmodifiableMap(escapeXsiMap) 262 ); 263 } 264 265 /* UNESCAPE TRANSLATORS */ 266 267 /** 268 * Translator object for unescaping escaped Java. 269 * 270 * While {@link #unescapeJava(String)} is the expected method of use, this 271 * object allows the Java unescaping functionality to be used 272 * as the foundation for a custom translator. 273 */ 274 public static final CharSequenceTranslator UNESCAPE_JAVA; 275 static { 276 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 277 unescapeJavaMap.put("\\\\", "\\"); 278 unescapeJavaMap.put("\\\"", "\""); 279 unescapeJavaMap.put("\\'", "'"); 280 unescapeJavaMap.put("\\", ""); 281 UNESCAPE_JAVA = new AggregateTranslator( 282 new OctalUnescaper(), // .between('\1', '\377'), 283 new UnicodeUnescaper(), 284 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 285 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 286 ); 287 } 288 289 /** 290 * Translator object for unescaping escaped EcmaScript. 291 * 292 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 293 * object allows the EcmaScript unescaping functionality to be used 294 * as the foundation for a custom translator. 295 */ 296 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 297 298 /** 299 * Translator object for unescaping escaped Json. 300 * 301 * While {@link #unescapeJson(String)} is the expected method of use, this 302 * object allows the Json unescaping functionality to be used 303 * as the foundation for a custom translator. 304 */ 305 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 306 307 /** 308 * Translator object for unescaping escaped HTML 3.0. 309 * 310 * While {@link #unescapeHtml3(String)} is the expected method of use, this 311 * object allows the HTML unescaping functionality to be used 312 * as the foundation for a custom translator. 313 */ 314 public static final CharSequenceTranslator UNESCAPE_HTML3 = 315 new AggregateTranslator( 316 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 317 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 318 new NumericEntityUnescaper() 319 ); 320 321 /** 322 * Translator object for unescaping escaped HTML 4.0. 323 * 324 * While {@link #unescapeHtml4(String)} is the expected method of use, this 325 * object allows the HTML unescaping functionality to be used 326 * as the foundation for a custom translator. 327 */ 328 public static final CharSequenceTranslator UNESCAPE_HTML4 = 329 new AggregateTranslator( 330 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 331 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 332 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 333 new NumericEntityUnescaper() 334 ); 335 336 /** 337 * Translator object for unescaping escaped XML. 338 * 339 * While {@link #unescapeXml(String)} is the expected method of use, this 340 * object allows the XML unescaping functionality to be used 341 * as the foundation for a custom translator. 342 */ 343 public static final CharSequenceTranslator UNESCAPE_XML = 344 new AggregateTranslator( 345 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 346 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 347 new NumericEntityUnescaper() 348 ); 349 350 /** 351 * Translator object for unescaping escaped Comma Separated Value entries. 352 * 353 * While {@link #unescapeCsv(String)} is the expected method of use, this 354 * object allows the CSV unescaping functionality to be used 355 * as the foundation for a custom translator. 356 */ 357 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 358 359 /** 360 * Translator object for unescaping escaped XSI Value entries. 361 * 362 * While {@link #unescapeXSI(String)} is the expected method of use, this 363 * object allows the XSI unescaping functionality to be used 364 * as the foundation for a custom translator. 365 */ 366 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 367 368 /** 369 * Translator object for unescaping backslash escaped entries. 370 */ 371 static class XsiUnescaper extends CharSequenceTranslator { 372 373 /** 374 * Escaped backslash constant. 375 */ 376 private static final char BACKSLASH = '\\'; 377 378 @Override 379 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 380 381 if (index != 0) { 382 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 383 } 384 385 final String s = input.toString(); 386 387 int segmentStart = 0; 388 int searchOffset = 0; 389 while (true) { 390 final int pos = s.indexOf(BACKSLASH, searchOffset); 391 if (pos == -1) { 392 if (segmentStart < s.length()) { 393 out.write(s.substring(segmentStart)); 394 } 395 break; 396 } 397 if (pos > segmentStart) { 398 out.write(s.substring(segmentStart, pos)); 399 } 400 segmentStart = pos + 1; 401 searchOffset = pos + 2; 402 } 403 404 return Character.codePointCount(input, 0, input.length()); 405 } 406 } 407 408 /* Helper functions */ 409 410 /** 411 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 412 * standard programming.</p> 413 * 414 * <p>Instead, the class should be used as:</p> 415 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 416 * 417 * <p>This constructor is public to permit tools that require a JavaBean 418 * instance to operate.</p> 419 */ 420 public StringEscapeUtils() { 421 super(); 422 } 423 424 /** 425 * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p> 426 * 427 * <p>Example:</p> 428 * <pre> 429 * new Builder(ESCAPE_HTML4) 430 * .append("<p>") 431 * .escape("This is paragraph 1 and special chars like & get escaped.") 432 * .append("</p><p>") 433 * .escape("This is paragraph 2 & more...") 434 * .append("</p>") 435 * .toString() 436 * </pre> 437 * 438 */ 439 public static final class Builder { 440 441 /** 442 * StringBuilder to be used in the Builder class. 443 */ 444 private final StringBuilder sb; 445 446 /** 447 * CharSequenceTranslator to be used in the Builder class. 448 */ 449 private final CharSequenceTranslator translator; 450 451 /** 452 * Builder constructor. 453 * 454 * @param translator a CharSequenceTranslator. 455 */ 456 private Builder(final CharSequenceTranslator translator) { 457 this.sb = new StringBuilder(); 458 this.translator = translator; 459 } 460 461 /** 462 * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p> 463 * 464 * @param input the String to escape 465 * @return {@code this}, to enable chaining 466 */ 467 public Builder escape(final String input) { 468 sb.append(translator.translate(input)); 469 return this; 470 } 471 472 /** 473 * Literal append, no escaping being done. 474 * 475 * @param input the String to append 476 * @return {@code this}, to enable chaining 477 */ 478 public Builder append(final String input) { 479 sb.append(input); 480 return this; 481 } 482 483 /** 484 * <p>Return the escaped string.</p> 485 * 486 * @return the escaped string 487 */ 488 @Override 489 public String toString() { 490 return sb.toString(); 491 } 492 } 493 494 /** 495 * Get a {@link Builder}. 496 * @param translator the text translator 497 * @return {@link Builder} 498 */ 499 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 500 return new Builder(translator); 501 } 502 503 // Java and JavaScript 504 //-------------------------------------------------------------------------- 505 /** 506 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 507 * 508 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 509 * 510 * <p>So a tab becomes the characters {@code '\\'} and 511 * {@code 't'}.</p> 512 * 513 * <p>The only difference between Java strings and JavaScript strings 514 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 515 * 516 * <p>Example:</p> 517 * <pre> 518 * input string: He didn't say, "Stop!" 519 * output string: He didn't say, \"Stop!\" 520 * </pre> 521 * 522 * @param input String to escape values in, may be null 523 * @return String with escaped values, {@code null} if null string input 524 */ 525 public static final String escapeJava(final String input) { 526 return ESCAPE_JAVA.translate(input); 527 } 528 529 /** 530 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 531 * <p>Escapes any values it finds into their EcmaScript String form. 532 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 533 * 534 * <p>So a tab becomes the characters {@code '\\'} and 535 * {@code 't'}.</p> 536 * 537 * <p>The only difference between Java strings and EcmaScript strings 538 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 539 * 540 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 541 * 542 * <p>Example:</p> 543 * <pre> 544 * input string: He didn't say, "Stop!" 545 * output string: He didn\'t say, \"Stop!\" 546 * </pre> 547 * 548 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 549 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 550 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 551 * may consider the 552 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. 553 * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 554 * 555 * @param input String to escape values in, may be null 556 * @return String with escaped values, {@code null} if null string input 557 */ 558 public static final String escapeEcmaScript(final String input) { 559 return ESCAPE_ECMASCRIPT.translate(input); 560 } 561 562 /** 563 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 564 * <p>Escapes any values it finds into their Json String form. 565 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 566 * 567 * <p>So a tab becomes the characters {@code '\\'} and 568 * {@code 't'}.</p> 569 * 570 * <p>The only difference between Java strings and Json strings 571 * is that in Json, forward-slash (/) is escaped.</p> 572 * 573 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 574 * 575 * <p>Example:</p> 576 * <pre> 577 * input string: He didn't say, "Stop!" 578 * output string: He didn't say, \"Stop!\" 579 * </pre> 580 * 581 * @param input String to escape values in, may be null 582 * @return String with escaped values, {@code null} if null string input 583 */ 584 public static final String escapeJson(final String input) { 585 return ESCAPE_JSON.translate(input); 586 } 587 588 /** 589 * <p>Unescapes any Java literals found in the {@code String}. 590 * For example, it will turn a sequence of {@code '\'} and 591 * {@code 'n'} into a newline character, unless the {@code '\'} 592 * is preceded by another {@code '\'}.</p> 593 * 594 * @param input the {@code String} to unescape, may be null 595 * @return a new unescaped {@code String}, {@code null} if null string input 596 */ 597 public static final String unescapeJava(final String input) { 598 return UNESCAPE_JAVA.translate(input); 599 } 600 601 /** 602 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 603 * 604 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 605 * into a newline character, unless the {@code '\'} is preceded by another 606 * {@code '\'}.</p> 607 * 608 * @see #unescapeJava(String) 609 * @param input the {@code String} to unescape, may be null 610 * @return A new unescaped {@code String}, {@code null} if null string input 611 */ 612 public static final String unescapeEcmaScript(final String input) { 613 return UNESCAPE_ECMASCRIPT.translate(input); 614 } 615 616 /** 617 * <p>Unescapes any Json literals found in the {@code String}.</p> 618 * 619 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 620 * into a newline character, unless the {@code '\'} is preceded by another 621 * {@code '\'}.</p> 622 * 623 * @see #unescapeJava(String) 624 * @param input the {@code String} to unescape, may be null 625 * @return A new unescaped {@code String}, {@code null} if null string input 626 */ 627 public static final String unescapeJson(final String input) { 628 return UNESCAPE_JSON.translate(input); 629 } 630 631 // HTML and XML 632 //-------------------------------------------------------------------------- 633 /** 634 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 635 * 636 * <p> 637 * For example: 638 * </p> 639 * <p><code>"bread" & "butter"</code></p> 640 * becomes: 641 * <p> 642 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 643 * </p> 644 * 645 * <p>Supports all known HTML 4.0 entities, including funky accents. 646 * Note that the commonly used apostrophe escape character (&apos;) 647 * is not a legal entity and so is not supported). </p> 648 * 649 * @param input the {@code String} to escape, may be null 650 * @return a new escaped {@code String}, {@code null} if null string input 651 * 652 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 653 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 654 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 655 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 656 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 657 */ 658 public static final String escapeHtml4(final String input) { 659 return ESCAPE_HTML4.translate(input); 660 } 661 662 /** 663 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 664 * <p>Supports only the HTML 3.0 entities. </p> 665 * 666 * @param input the {@code String} to escape, may be null 667 * @return a new escaped {@code String}, {@code null} if null string input 668 */ 669 public static final String escapeHtml3(final String input) { 670 return ESCAPE_HTML3.translate(input); 671 } 672 673 //----------------------------------------------------------------------- 674 /** 675 * <p>Unescapes a string containing entity escapes to a string 676 * containing the actual Unicode characters corresponding to the 677 * escapes. Supports HTML 4.0 entities.</p> 678 * 679 * <p>For example, the string {@code "<Français>"} 680 * will become {@code "<Fran�ais>"}</p> 681 * 682 * <p>If an entity is unrecognized, it is left alone, and inserted 683 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 684 * become {@code ">&zzzz;x"}.</p> 685 * 686 * @param input the {@code String} to unescape, may be null 687 * @return a new unescaped {@code String}, {@code null} if null string input 688 */ 689 public static final String unescapeHtml4(final String input) { 690 return UNESCAPE_HTML4.translate(input); 691 } 692 693 /** 694 * <p>Unescapes a string containing entity escapes to a string 695 * containing the actual Unicode characters corresponding to the 696 * escapes. Supports only HTML 3.0 entities.</p> 697 * 698 * @param input the {@code String} to unescape, may be null 699 * @return a new unescaped {@code String}, {@code null} if null string input 700 */ 701 public static final String unescapeHtml3(final String input) { 702 return UNESCAPE_HTML3.translate(input); 703 } 704 705 /** 706 * <p>Escapes the characters in a {@code String} using XML entities.</p> 707 * 708 * <p>For example: {@code "bread" & "butter"} => 709 * {@code "bread" & "butter"}. 710 * </p> 711 * 712 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 713 * characters or unpaired Unicode surrogate codepoints, even after escaping. 714 * {@code escapeXml10} will remove characters that do not fit in the 715 * following ranges:</p> 716 * 717 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 718 * 719 * <p>Though not strictly necessary, {@code escapeXml10} will escape 720 * characters in the following ranges:</p> 721 * 722 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 723 * 724 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 725 * document. If you want to allow more non-text characters in an XML 1.1 726 * document, use {@link #escapeXml11(String)}.</p> 727 * 728 * @param input the {@code String} to escape, may be null 729 * @return a new escaped {@code String}, {@code null} if null string input 730 * @see #unescapeXml(java.lang.String) 731 */ 732 public static String escapeXml10(final String input) { 733 return ESCAPE_XML10.translate(input); 734 } 735 736 /** 737 * <p>Escapes the characters in a {@code String} using XML entities.</p> 738 * 739 * <p>For example: {@code "bread" & "butter"} => 740 * {@code "bread" & "butter"}. 741 * </p> 742 * 743 * <p>XML 1.1 can represent certain control characters, but it cannot represent 744 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 745 * {@code escapeXml11} will remove characters that do not fit in the following 746 * ranges:</p> 747 * 748 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 749 * 750 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 751 * 752 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 753 * 754 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 755 * use it for XML 1.0 documents.</p> 756 * 757 * @param input the {@code String} to escape, may be null 758 * @return a new escaped {@code String}, {@code null} if null string input 759 * @see #unescapeXml(java.lang.String) 760 */ 761 public static String escapeXml11(final String input) { 762 return ESCAPE_XML11.translate(input); 763 } 764 765 //----------------------------------------------------------------------- 766 /** 767 * <p>Unescapes a string containing XML entity escapes to a string 768 * containing the actual Unicode characters corresponding to the 769 * escapes.</p> 770 * 771 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 772 * Does not support DTDs or external entities.</p> 773 * 774 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 775 * Unicode characters. This may change in future releases. </p> 776 * 777 * @param input the {@code String} to unescape, may be null 778 * @return a new unescaped {@code String}, {@code null} if null string input 779 * @see #escapeXml10(String) 780 * @see #escapeXml11(String) 781 */ 782 public static final String unescapeXml(final String input) { 783 return UNESCAPE_XML.translate(input); 784 } 785 786 //----------------------------------------------------------------------- 787 788 /** 789 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 790 * if required.</p> 791 * 792 * <p>If the value contains a comma, newline or double quote, then the 793 * String value is returned enclosed in double quotes.</p> 794 * 795 * <p>Any double quote characters in the value are escaped with another double quote.</p> 796 * 797 * <p>If the value does not contain a comma, newline or double quote, then the 798 * String value is returned unchanged.</p> 799 * 800 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 801 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 802 * 803 * @param input the input CSV column String, may be null 804 * @return the input String, enclosed in double quotes if the value contains a comma, 805 * newline or double quote, {@code null} if null string input 806 */ 807 public static final String escapeCsv(final String input) { 808 return ESCAPE_CSV.translate(input); 809 } 810 811 /** 812 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 813 * 814 * <p>If the value is enclosed in double quotes, and contains a comma, newline 815 * or double quote, then quotes are removed. 816 * </p> 817 * 818 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 819 * to just one double quote. </p> 820 * 821 * <p>If the value is not enclosed in double quotes, or is and does not contain a 822 * comma, newline or double quote, then the String value is returned unchanged.</p> 823 * 824 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 825 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 826 * 827 * @param input the input CSV column String, may be null 828 * @return the input String, with enclosing double quotes removed and embedded double 829 * quotes unescaped, {@code null} if null string input 830 */ 831 public static final String unescapeCsv(final String input) { 832 return UNESCAPE_CSV.translate(input); 833 } 834 835 /** 836 * <p>Escapes the characters in a {@code String} using XSI rules.</p> 837 * 838 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 839 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])} 840 * instead.</p> 841 * 842 * <p>Example:</p> 843 * <pre> 844 * input string: He didn't say, "Stop!" 845 * output string: He\ didn\'t\ say,\ \"Stop!\" 846 * </pre> 847 * 848 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 849 * @param input String to escape values in, may be null 850 * @return String with escaped values, {@code null} if null string input 851 */ 852 public static final String escapeXSI(final String input) { 853 return ESCAPE_XSI.translate(input); 854 } 855 856 /** 857 * <p>Unescapes the characters in a {@code String} using XSI rules.</p> 858 * 859 * @see StringEscapeUtils#escapeXSI(String) 860 * @param input the {@code String} to unescape, may be null 861 * @return a new unescaped {@code String}, {@code null} if null string input 862 */ 863 public static final String unescapeXSI(final String input) { 864 return UNESCAPE_XSI.translate(input); 865 } 866 867}