1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
use std::io::{self, Write};
use std::char;
use io_support::{write_char};
use entities::*;
///
/// HTML entity-encode a string.
///
/// Entity-encodes a string with a minimal set of entities:
///
/// - `" -- "`
/// - `& -- &`
/// - `' -- '`
/// - `< -- <`
/// - `> -- >`
///
/// # Arguments
/// - `s` - The string to encode.
///
/// # Return value
/// The encoded string.
///
/// # Example
/// ~~~
/// let encoded = htmlescape::encode_minimal("<em>Hej!</em>");
/// assert_eq!(&encoded, "<em>Hej!</em>");
/// ~~~
///
/// # Safety notes
/// Using the function to encode an untrusted string that is to be used as a HTML attribute value
/// may lead to XSS vulnerabilities. Consider the following example:
///
/// ~~~
/// let name = "dummy onmouseover=alert(/XSS/)"; // User input
/// let tag = format!("<option value={}>", htmlescape::encode_minimal(name));
/// // Here `tag` is "<option value=dummy onmouseover=alert(/XSS/)>"
/// ~~~
///
/// Use `escape_attribute` for escaping HTML attribute values.
pub fn encode_minimal(s: &str) -> String {
let mut writer = Vec::with_capacity((s.len()/3 + 1) * 4);
match encode_minimal_w(s, &mut writer) {
Err(_) => panic!(),
Ok(_) => String::from_utf8(writer).expect("impossible invalid UTF-8 in output")
}
}
///
/// HTML entity-encode a string to a writer.
///
/// Similar to `encode_minimal`, except that the output is written to a writer rather
/// than returned as a `String`.
///
/// # Arguments
/// - `s` - The string to encode.
/// - `writer` - Output is written to here.
pub fn encode_minimal_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
for c in s.chars() {
match get_entity(c) {
None => try!(write_char(writer, c)),
Some(entity) => try!(writer.write_all(entity.as_bytes()))
}
}
Ok(())
}
///
/// HTML entity-encodes a string for use in attributes values.
///
/// Entity-encodes a string using an extensive set of entities, giving a string suitable for use
/// in HTML attribute values. All entities from `encode_minimal` are used, and further, all
/// non-alphanumeric ASCII characters are hex-encoded (`&#x__;`).
/// See the [OWASP XSS Prevention Cheat Sheet](
/// https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet) for more
/// information on entity-encoding for attribute values.
///
/// # Arguments
/// - `s` - The string to encode.
///
/// # Return value
/// The encoded string.
///
/// # Example
/// ~~~
/// let encoded = htmlescape::encode_attribute("\"No\", he said.");
/// assert_eq!(&encoded, ""No", he said.");
/// ~~~
pub fn encode_attribute(s: &str) -> String {
let mut writer = Vec::with_capacity(s.len() * 3);
match encode_attribute_w(s, &mut writer) {
Err(_) => panic!(),
Ok(_) => String::from_utf8(writer).unwrap()
}
}
///
/// HTML entity-encodes a string, for use in attributes values, to a writer.
///
/// Similar to `encode_attribute`, except that the output is written to a writer rather
/// than returned as a `String`.
///
/// # Arguments
/// - `s` - The string to encode.
/// - `writer` - Output is written to here.
pub fn encode_attribute_w<W: Write>(s: &str, writer: &mut W) -> io::Result<()> {
for c in s.chars() {
let b = c as usize;
let res = match get_entity(c) {
Some(entity) => writer.write_all(entity.as_bytes()),
None =>
if b < 256 && (b > 127 || !is_ascii_alnum(c)) {
write_hex(writer, c)
} else {
write_char(writer, c)
}
};
try!(res);
}
Ok(())
}
fn get_entity(c: char) -> Option<&'static str> {
match MINIMAL_ENTITIES.binary_search_by(|&(ec, _)| ec.cmp(&c) ) {
Err(..) => None,
Ok(idx) => {
let (_, e) = MINIMAL_ENTITIES[idx];
Some(e)
}
}
}
fn write_hex<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
let hex = b"0123456789ABCDEF";
try!(writer.write(b"&#x"));
let n = c as u8;
let bytes = [hex[((n & 0xF0) >> 4) as usize],
hex[(n & 0x0F) as usize],
b';'];
writer.write_all(&bytes)
}
fn is_ascii_alnum(c: char) -> bool {
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
}