Browse Source

Use traits for any input format

Arjun Barrett 4 năm trước cách đây
mục cha
commit
e2489ba42e
4 tập tin đã thay đổi với 116 bổ sung36 xóa
  1. 3 1
      README.md
  2. 1 0
      rs/Cargo.toml
  3. 104 31
      rs/src/lib.rs
  4. 8 4
      rs/src/main.rs

+ 3 - 1
README.md

@@ -343,7 +343,9 @@ Note that there exist some small libraries like [`tiny-inflate`](https://npmjs.c
 
 So what makes `fflate` different? It takes the brilliant innovations of `UZIP.js` and optimizes them while adding direct support for GZIP and Zlib data. And unlike all of the above libraries, it uses ES Modules to allow for partial builds through tree shaking, meaning that it can rival even `tiny-inflate` in size while maintaining excellent performance. The end result is a library that, in total, weighs 8kB minified for the core build (3kB for decompression only and 5kB for compression only), is about 15% faster than `UZIP.js` or up to 60% faster than `pako`, and achieves the same or better compression ratio than the rest.
 
-Before you decide that `fflate` is the end-all compression library, you should note that JavaScript simply cannot rival the performance of a compiled language. If you're willing to have 160 kB of extra weight and [much less browser support](https://caniuse.com/wasm), you can achieve  more performance than `fflate` with a WASM build of Zlib like [`wasm-flate`](https://www.npmjs.com/package/wasm-flate). And if you're only using Node.js, just use the [native Zlib bindings](https://nodejs.org/api/zlib.html) that offer the best performance. Though note that even against these compiled libraries, `fflate` is only around 30% slower in decompression and 10% slower in compression, and can still achieve better compression ratios!
+If you're willing to have 160 kB of extra weight and [much less browser support](https://caniuse.com/wasm), you could theoretically achieve more performance than `fflate` with a WASM build of Zlib like [`wasm-flate`](https://www.npmjs.com/package/wasm-flate). However, per some tests I conducted, the WASM interpreters of major browsers are not fast enough as of December 2020 for `wasm-flate` to be useful: `fflate` is around 2x faster.
+
+Before you decide that `fflate` is the end-all compression library, you should note that JavaScript simply cannot rival the performance of a native program. If you're only using Node.js, use the [native Zlib bindings](https://nodejs.org/api/zlib.html) that offer the best performance. Though note that even against Zlib, `fflate` is only around 30% slower in decompression and 10% slower in compression, and can still achieve better compression ratios!
 
 ## Browser support
 `fflate` makes heavy use of typed arrays (`Uint8Array`, `Uint16Array`, etc.). Typed arrays can be polyfilled at the cost of performance, but the most recent browser that doesn't support them [is from 2011](https://caniuse.com/typedarrays), so I wouldn't bother.

+ 1 - 0
rs/Cargo.toml

@@ -9,6 +9,7 @@ lazy_static = "^1.4.0"
 
 [profile.release]
 opt-level = "s"
+lto = true
 
 [lib]
 crate-type = ["cdylib", "rlib"]

+ 104 - 31
rs/src/lib.rs

@@ -60,18 +60,18 @@ fn freb(b: &[u16], r: &mut [u32]) {
 
 // hmap base
 fn hmb(cd: &[u8], mb: u8, le: &mut [u16]) {
-    let mut l = [0u16; 16];
+    let t = (mb + 1) as usize;
+    for i in 1..t {
+        le[i] = 0;
+    }
     for &cl in cd {
-        l[cl as usize] += 1;
+        le[cl as usize] += 1;
     }
     let mut v = 0;
-    let t = (mb + 1) as usize;
     for i in 1..t {
+        let val = le[i];
         le[i] = v;
-        v = (v + l[i]) << 1;
-    }
-    for i in t..15 {
-        le[i] = 0;
+        v = (v + val) << 1;
     }
 }
 
@@ -92,14 +92,17 @@ fn hrmap(cd: &[u8], mb: u8, co: &mut [u16], le: &mut [u16]) {
     for i in 0..cd.len() {
         let cl = cd[i] as usize;
         if cl != 0 {
-            let sv = ((i as u16) << 4) | cl as u16;
             let r = mbu - cl;
             let v = (le[cl] << r) as usize;
-            le[cl] += 1;
             let m = v + (1 << r);
+            let sv = if cl != 0 {
+                le[cl] += 1;
+                ((i as u16) << 4) | cl as u16
+            } else {
+                0
+            };
             for j in v..m {
-                let ind = rev[j] >> rvb;
-                co[ind] = sv;
+                co[rev[j] >> rvb] = sv;
             }
         }
     }
@@ -132,18 +135,18 @@ lazy_static! {
         hmap(&flt, 9, &mut v, &mut [0u16; 16]);
         v
     };
-    static ref flrm: [u16; 511] = {
-        let mut v: [u16; 511] = [0u16; 511];
+    static ref flrm: [u16; 512] = {
+        let mut v = [0u16; 512];
         hrmap(&flt, 9, &mut v, &mut [0u16; 16]);
         v
     };
-    static ref fdm: [u16; 31] = {
-        let mut v = [0u16; 31];
+    static ref fdm: [u16; 32] = {
+        let mut v = [0u16; 32];
         hmap(&fdt, 5, &mut v, &mut [0u16; 16]);
         v
     };
-    static ref fdrm: [u16; 31] = {
-        let mut v: [u16; 31] = [0u16; 31];
+    static ref fdrm: [u16; 32] = {
+        let mut v = [0u16; 32];
         hrmap(&fdt, 5, &mut v, &mut [0u16; 16]);
         v
     };
@@ -200,7 +203,77 @@ pub enum InflateError {
     InvalidDistance
 }
 
-fn inflt(dat: &[u8], buf: &mut Vec<u8>, st: &mut InflateState) -> Result<(), InflateError> {
+pub trait OutputBuffer {
+    fn w(self: &mut Self, value: u8);
+    fn wall(self: &mut Self, slice: &[u8]) {
+        for &value in slice {
+            self.w(value);
+        }
+    }
+    fn palloc(self: &mut Self, extra_bytes: usize);
+    fn back(self: &Self, back: usize) -> u8;
+}
+
+impl OutputBuffer for Vec<u8> {
+    #[inline(always)]
+    fn w(self: &mut Self, value: u8) {
+        self.push(value);
+    }
+    #[inline(always)]
+    fn wall(self: &mut Self, slice: &[u8]) {
+        self.extend(slice.iter());
+    }
+    #[inline(always)]
+    fn palloc(self: &mut Self, extra_bytes: usize) {
+        self.reserve(extra_bytes);
+    }
+    #[inline(always)]
+    fn back(self: &Self, back: usize) -> u8 {
+        self[self.len() - back]
+    }
+}
+
+pub struct SliceOutputBuffer<'a > {
+    buf: &'a mut [u8],
+    byte: usize
+}
+
+impl<'a> SliceOutputBuffer<'a> {
+    #[inline(always)]
+    pub fn new(slice: &'a mut [u8]) -> SliceOutputBuffer<'a> {
+        SliceOutputBuffer {
+            buf: slice,
+            byte: 0
+        }
+    }
+}
+
+impl<'a> OutputBuffer for SliceOutputBuffer<'a> {
+    #[inline(always)]
+    fn w(self: &mut Self, value: u8) {
+        if self.byte < self.buf.len() {
+            self.buf[self.byte] = value;
+        }
+        self.byte += 1;
+    }
+    #[inline(always)]
+    fn wall(self: &mut Self, slice: &[u8]) {
+        let sl = slice.len();
+        let end = self.byte + sl;
+        if end <= self.buf.len() {
+            self.buf[self.byte..end].copy_from_slice(slice);
+        }
+        self.byte = end;
+    }
+    #[inline(always)]
+    fn palloc(self: &mut Self, _eb: usize) {}
+    #[inline(always)]
+    fn back(self: &Self, back: usize) -> u8 {
+        self.buf[self.byte - back]
+    }
+}
+
+fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Result<(), InflateError> {
     let mut pos = st.pos;
     let sl = dat.len();
     if st.bfinal && st.head { return Ok(()) };
@@ -223,12 +296,12 @@ fn inflt(dat: &[u8], buf: &mut Vec<u8>, st: &mut InflateState) -> Result<(), Inf
                         }
                         break;
                     }
-                    buf.extend(dat[s..t].iter());
+                    buf.wall(&dat[s..t]);
                     continue;
                 }
                 1 => {
-                    st.lmap.copy_from_slice(&*flrm);
-                    st.dmap.copy_from_slice(&*fdrm);
+                    st.lmap[..512].copy_from_slice(&*flrm);
+                    st.dmap[..32].copy_from_slice(&*fdrm);
                     st.lbits = 9;
                     st.dbits = 5;
                 }
@@ -315,7 +388,7 @@ fn inflt(dat: &[u8], buf: &mut Vec<u8>, st: &mut InflateState) -> Result<(), Inf
             }
             let sym = c >> 4;
             if (sym >> 8) == 0 {
-                buf.push(sym as u8);
+                buf.w(sym as u8);
             } else if sym == 256 {
                 st.head = true;
                 break;
@@ -343,7 +416,7 @@ fn inflt(dat: &[u8], buf: &mut Vec<u8>, st: &mut InflateState) -> Result<(), Inf
                     return Err(InflateError::UnexpectedEOF);
                 }
                 while add != 0 {
-                    buf.push(buf[buf.len() - dt]);
+                    buf.w(buf.back(dt));
                     add -= 1;
                 }
             }
@@ -356,12 +429,12 @@ fn inflt(dat: &[u8], buf: &mut Vec<u8>, st: &mut InflateState) -> Result<(), Inf
     Ok(())
 }
 
-pub fn inflate(dat: &[u8]) -> Result<Vec<u8>, InflateError> {
-    let mut v = Vec::with_capacity(dat.len() * 3);
-    let mut lmap = [0u16; 32768];
-    let mut dmap = [0u16; 32768];
-    let mut clmap = [0u16; 128];
-    let mut le = [0u16; 16];
+pub fn inflate(dat: &[u8], out: &mut dyn OutputBuffer) -> Result<(), InflateError> {
+    out.palloc(dat.len() * 3);
+    let mut buf = [0u16; 65680];
+    let (mut lmap, buf) = buf.split_at_mut(32768);
+    let (mut dmap, buf) = buf.split_at_mut(32768);
+    let (mut clmap, mut le) = buf.split_at_mut(128);
     let mut st = InflateState {
         lmap: &mut lmap,
         dmap: &mut dmap,
@@ -374,6 +447,6 @@ pub fn inflate(dat: &[u8]) -> Result<Vec<u8>, InflateError> {
         last: true,
         head: true
     };
-    inflt(dat, &mut v, &mut st)?;
-    Ok(v)
+    inflt(dat, out, &mut st)?;
+    Ok(())
 }

+ 8 - 4
rs/src/main.rs

@@ -9,16 +9,20 @@ mod lib;
 fn main() {
     let args: Vec<String> = args().collect();
     // Assumes run in root dir - good enough for a test
-    let fp_base = String::from("../test/data/largeImage");
+    let fp_base = String::from("./test");
     let fp = &args.get(1).unwrap_or(&fp_base);
     let mut f = File::open(fp).unwrap();
     let mut v: Vec<u8> = Vec::new();
     if f.read_to_end(&mut v).is_ok() {
         for _ in 0..5 {
             let now = Instant::now();
-            let out = lib::inflate(&v);
-            let el = now.elapsed();
-            println!("{}.{:06}s {:?}", el.as_secs(), el.as_nanos() / 1000, out.unwrap().len());
+            let mut buf = Vec::with_capacity(52344054);
+            unsafe { buf.set_len(52344054); }
+            let mut out = lib::SliceOutputBuffer::new(&mut buf);
+            if let Ok(()) = lib::inflate(&v, &mut out) {
+                let el = now.elapsed();
+                println!("{}.{:06}s {:?}", el.as_secs(), el.as_nanos() / 1000, buf.len());
+            };
         }
         // for _ in 0..5 {
         //     let now = Instant::now();