BurntSushi
diff --git a/‎Cargo.lock
+16 b/‎Cargo.lock
+16
diff --git a/‎Cargo.toml
+1 b/‎Cargo.toml
+1
diff --git a/‎README.md
+6-8 b/‎README.md
+6-8
diff --git a/‎doc/rg.1.md
+7 b/‎doc/rg.1.md
+7
diff --git a/‎src/app.rs
+12-2 b/‎src/app.rs
+12-2
diff --git a/‎src/args.rs
+32 b/‎src/args.rs
+32
@@ -29,6 +29,7 @@ path = "tests/tests.rs"
 atty = "0.2.2"
 bytecount = "0.1.4"
 clap = "2.20.5"
+encoding_rs = "0.5.0"
 env_logger = { version = "0.4", default-features = false }
 grep = { version = "0.1.5", path = "grep" }
 ignore = { version = "0.1.7", path = "ignore" }
 
@@ -83,6 +83,10 @@ increases the times to `3.081s` for ripgrep and `11.403s` for GNU grep.
   of search results, searching multiple patterns, highlighting matches with
   color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
   supporting Unicode (which is always on).
+* `ripgrep` supports searching files in text encodings other than UTF-8, such
+  as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
+  automatically detecting UTF-16 is provided. Other text encodings must be
+  specifically specified with the `-E/--encoding` flag.)
 
 In other words, use `ripgrep` if you like speed, filtering by default, fewer
 bugs and Unicode support.
@@ -101,18 +105,12 @@ give you a glimpse at some important downsides or missing features of
   support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
   `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
   supported.)
-* If you need to search files with text encodings other than UTF-8 (like
-  UTF-16), then `ripgrep` won't work. `ripgrep` will still work on ASCII
-  compatible encodings like latin1 or otherwise partially valid UTF-8.
-  `ripgrep` *can* search for arbitrary bytes though, which might work in
-  a pinch. (Likely to be supported in the future.)
 * `ripgrep` doesn't yet support searching compressed files. (Likely to be
   supported in the future.)
 * `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)
 
-In other words, if you like fancy regexes, non-UTF-8 character encodings,
-searching compressed files or multiline search, then `ripgrep` may not quite
-meet your needs (yet).
+In other words, if you like fancy regexes, searching compressed files or
+multiline search, then `ripgrep` may not quite meet your needs (yet).
 
 ### Is it really faster than everything else?
 
 
@@ -136,6 +136,13 @@ Project home page: https://github.com/BurntSushi/ripgrep
 --debug
 : Show debug messages.
 
+-E, --encoding *ENCODING*
+: Specify the text encoding that ripgrep will use on all files
+  searched. The default value is 'auto', which will cause ripgrep to do
+  a best effort automatic detection of encoding on a per-file basis.
+  Other supported values can be found in the list of labels here:
+  https://encoding.spec.whatwg.org/#concept-encoding-get
+
 -f, --file FILE ...
 : Search for patterns from the given file, with one pattern per line. When this
   flag is used or multiple times or in combination with the -e/--regexp flag,
 
@@ -96,6 +96,8 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
              .possible_values(&["never", "auto", "always", "ansi"]))
         .arg(flag("colors").value_name("SPEC")
              .takes_value(true).multiple(true).number_of_values(1))
+        .arg(flag("encoding").short("E").value_name("ENCODING")
+             .takes_value(true).number_of_values(1))
         .arg(flag("fixed-strings").short("F"))
         .arg(flag("glob").short("g")
              .takes_value(true).multiple(true).number_of_values(1)
@@ -251,6 +253,14 @@ lazy_static! {
               change the match color to magenta and the background color for \
               line numbers to yellow:\n\n\
               rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo.");
+        doc!(h, "encoding",
+             "Specify the text encoding of files to search.",
+             "Specify the text encoding that ripgrep will use on all files \
+              searched. The default value is 'auto', which will cause ripgrep \
+              to do a best effort automatic detection of encoding on a \
+              per-file basis. Other supported values can be found in the list \
+              of labels here: \
+              https://encoding.spec.whatwg.org/#concept-encoding-get");
         doc!(h, "fixed-strings",
              "Treat the pattern as a literal string.",
              "Treat the pattern as a literal string instead of a regular \
@@ -335,9 +345,9 @@ lazy_static! {
               provided are searched. Empty pattern lines will match all input \
               lines, and the newline is not counted as part of the pattern.");
         doc!(h, "files-with-matches",
-             "Only show the path of each file with at least one match.");
+             "Only show the paths with at least one match.");
         doc!(h, "files-without-match",
-             "Only show the path of each file that contains zero matches.");
+             "Only show the paths that contains zero matches.");
         doc!(h, "with-filename",
              "Show file name for each match.",
              "Prefix each match with the file name that contains it. This is \
 
@@ -10,6 +10,7 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use clap;
+use encoding_rs::Encoding;
 use env_logger;
 use grep::{Grep, GrepBuilder};
 use log;
@@ -41,6 +42,7 @@ pub struct Args {
     column: bool,
     context_separator: Vec<u8>,
     count: bool,
+    encoding: Option<&'static Encoding>,
     files_with_matches: bool,
     files_without_matches: bool,
     eol: u8,
@@ -224,6 +226,7 @@ impl Args {
             .after_context(self.after_context)
             .before_context(self.before_context)
             .count(self.count)
+            .encoding(self.encoding)
             .files_with_matches(self.files_with_matches)
             .files_without_matches(self.files_without_matches)
             .eol(self.eol)
@@ -330,6 +333,7 @@ impl<'a> ArgMatches<'a> {
             column: self.column(),
             context_separator: self.context_separator(),
             count: self.is_present("count"),
+            encoding: try!(self.encoding()),
             files_with_matches: self.is_present("files-with-matches"),
             files_without_matches: self.is_present("files-without-match"),
             eol: b'\n',
@@ -569,13 +573,18 @@ impl<'a> ArgMatches<'a> {
     /// will need to search.
     fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
         let (before, after) = try!(self.contexts());
+        let enc = try!(self.encoding());
         Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
             false
         } else if self.is_present("mmap") {
             true
         } else if cfg!(target_os = "macos") {
             // On Mac, memory maps appear to suck. Neat.
             false
+        } else if enc.is_some() {
+            // There's no practical way to transcode a memory map that isn't
+            // isomorphic to searching over io::Read.
+            false
         } else {
             // If we're only searching a few paths and all of them are
             // files, then memory maps are probably faster.
@@ -721,6 +730,29 @@ impl<'a> ArgMatches<'a> {
         Ok(ColorSpecs::new(&specs))
     }
 
+    /// Return the text encoding specified.
+    ///
+    /// If the label given by the caller doesn't correspond to a valid
+    /// supported encoding (and isn't `auto`), then return an error.
+    ///
+    /// A `None` encoding implies that the encoding should be automatically
+    /// detected on a per-file basis.
+    fn encoding(&self) -> Result<Option<&'static Encoding>> {
+        match self.0.value_of_lossy("encoding") {
+            None => Ok(None),
+            Some(label) => {
+                if label == "auto" {
+                    return Ok(None);
+                }
+                match Encoding::for_label(label.as_bytes()) {
+                    Some(enc) => Ok(Some(enc)),
+                    None => Err(From::from(
+                        format!("unsupported encoding: {}", label))),
+                }
+            }
+        }
+    }
+
     /// Returns the approximate number of threads that ripgrep should use.
     fn threads(&self) -> Result<usize> {
         if self.is_present("sort-files") {