Преглед на файлове

Make streaming inflate consume all data

Arjun Barrett преди 4 години
родител
ревизия
c591757bd9
променени са 5 файла, в които са добавени 154 реда и са изтрити 101 реда
  1. 3 0
      CHANGELOG.md
  2. 1 1
      package.json
  3. 2 1
      rs/fflate/Cargo.toml
  4. 122 83
      rs/fflate/src/lib.rs
  5. 26 16
      src/index.ts

+ 3 - 0
CHANGELOG.md

@@ -1,3 +1,6 @@
+## 0.6.4
+- Made streaming inflate consume all data possible
+- Optimized use of values near 32-bit boundary
 ## 0.6.3
 - Patch exports of async functions
 - Fix streaming unzip

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
   "name": "fflate",
-  "version": "0.6.3",
+  "version": "0.6.4",
   "description": "High performance (de)compression in an 8kB package",
   "main": "./lib/index.cjs",
   "module": "./esm/browser.js",

+ 2 - 1
rs/fflate/Cargo.toml

@@ -21,9 +21,10 @@ edition = "2018"
 
 [dependencies]
 lazy_static = "1.4"
+miniz_oxide = "*"
 
 [profile.release]
-opt-level = "s"
+opt-level = 3
 lto = true
 
 [features]

+ 122 - 83
rs/fflate/src/lib.rs

@@ -5,11 +5,11 @@
 // Instead of trying to read this code, check out the TypeScript version
 
 #![allow(non_upper_case_globals)]
-#![cfg_attr(not(feature = "std"), no_std)]
+// #![cfg_attr(not(feature = "std"), no_std)]
 use lazy_static::lazy_static;
 
-#[cfg(feature = "std")]
-use std::{vec::Vec, io::{Read, Write, Error, ErrorKind}};
+// #[cfg(feature = "std")]
+use std::{vec::Vec, io::{Read, Write, Error, ErrorKind}, ops::Range};
 
 const fleb: [usize; 32] = [
     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0,
@@ -63,11 +63,11 @@ fn freb(b: &[u16], r: &mut [u32]) {
 // hmap base
 fn hmb(cd: &[u8], mb: u8, le: &mut [u16]) {
     let t = (mb + 1) as usize;
-    for i in 1..t {
-        le[i] = 0;
-    }
+    le.iter_mut().for_each(|v| *v = 0);
     for &cl in cd {
-        le[cl as usize] += 1;
+        if cl != 0 {
+            le[cl as usize] += 1;
+        }
     }
     let mut v = 0;
     for i in 1..t {
@@ -93,17 +93,12 @@ fn hrmap(cd: &[u8], mb: u8, co: &mut [u16], le: &mut [u16]) {
     let mbu = mb as usize;
     for i in 0..cd.len() {
         let cl = cd[i] as usize;
-        // TODO: remove cond
         if cl != 0 {
             let r = mbu - cl;
             let v = (le[cl] << r) as usize;
+            le[cl] += 1;
             let m = v + (1 << r);
-            let sv = if cl != 0 {
-                le[cl] += 1;
-                ((i as u16) << 4) | cl as u16
-            } else {
-                0
-            };
+            let sv = ((i as u16) << 4) | cl as u16;
             for j in v..m {
                 co[rev[j] >> rvb] = sv;
             }
@@ -112,7 +107,7 @@ fn hrmap(cd: &[u8], mb: u8, co: &mut [u16], le: &mut [u16]) {
 }
 
 lazy_static! {
-    static ref revfl: [u32; 261]= {
+    static ref revfl: [u32; 261] = {
         let mut v = [0u32; 261];
         freb(&fl, &mut v);
         v[258] = 28;
@@ -156,26 +151,27 @@ lazy_static! {
 }
 
 #[inline(always)]
-fn byte(dat: &[u8], bpos: usize) -> u8 {
-   if bpos < dat.len() {
-       dat[bpos]
-   } else {
-       0
-   }
+fn mbits(dat: &[u8], pos: usize, mask: u8) -> u8 {
+    (dat[pos >> 3] >> (pos & 7)) & mask
+}
+
+fn mbits16(dat: &[u8], pos: usize, mask: u16) -> u16 {
+    let b = pos >> 3;
+    ((dat[b] as u16 | ((dat[b + 1] as u16) << 8)) >> (pos & 7)) & mask
 }
 
 #[inline(always)]
 fn bits(dat: &[u8], pos: usize, mask: u8) -> u8 {
     let b = pos >> 3;
-    ((byte(dat, b) as u16 | ((byte(dat, b + 1) as u16) << 8)) >> (pos & 7)) as u8 & mask
+    ((dat[b] as u16 | ((dat[b + 1] as u16) << 8)) >> (pos & 7)) as u8 & mask
 }
 
 #[inline(always)]
 fn bits16(dat: &[u8], pos: usize, mask: u16) -> u16 {
     let b = pos >> 3;
-    ((byte(dat, b) as u32
-        | ((byte(dat, b + 1) as u32) << 8)
-        | ((byte(dat, b + 2) as u32) << 16))
+    ((dat[b] as u32
+        | ((dat[b + 1] as u32) << 8)
+        | ((dat[b + 2] as u32) << 16))
         >> (pos & 7)) as u16
         & mask
 }
@@ -234,7 +230,7 @@ pub enum InflateError {
     InvalidDistance
 }
 
-#[cfg(feature = "std")]
+// #[cfg(feature = "std")]
 impl From<InflateError> for Error {
     fn from(error: InflateError) -> Self {
         Error::new(match error {
@@ -249,6 +245,16 @@ impl From<InflateError> for Error {
     }
 }
 
+fn max(dat: &[u8]) -> u8 {
+    let mut m = 0;
+    for &v in dat {
+        if v > m {
+            m = v;
+        }
+    }
+    m
+}
+
 pub trait OutputBuffer {
     fn write(&mut self, value: u8);
     fn write_all(&mut self, slice: &[u8]) {
@@ -260,18 +266,18 @@ pub trait OutputBuffer {
     fn back(&self, back: usize) -> u8;
 }
 
-#[cfg(feature = "std")]
+// #[cfg(feature = "std")]
 impl OutputBuffer for Vec<u8> {
     #[inline(always)]
-    fn w(&mut self, value: u8) {
+    fn write(&mut self, value: u8) {
         self.push(value);
     }
     #[inline(always)]
-    fn wall(&mut self, slice: &[u8]) {
+    fn write_all(&mut self, slice: &[u8]) {
         self.extend(slice.iter());
     }
     #[inline(always)]
-    fn palloc(&mut self, extra_bytes: usize) {
+    fn pre_alloc(&mut self, extra_bytes: usize) {
         self.reserve(extra_bytes);
     }
     #[inline(always)]
@@ -325,6 +331,8 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
     let sl = dat.len();
     if sl == 0 || (st.head && sl < 5) { return Ok(()); }
     let tbts = sl << 3;
+    let tbts1 = tbts - 8;
+    let tbts2 = tbts1 - 8;
     loop {
         if st.head {
             st.bfinal = bits(dat, pos, 1) != 0;
@@ -333,8 +341,7 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
             match btype {
                 0 => {
                     let s = shft(pos) + 4;
-                    let l = dat[s - 4] as u16 | ((dat[s - 3] as u16) << 8);
-                    let t = s + l as usize;
+                    let t = s + (dat[s - 4] as u16 | ((dat[s - 3] as u16) << 8)) as usize;
                     if t > dat.len() {
                         if st.last {
                             return Err(InflateError::UnexpectedEOF);
@@ -362,7 +369,7 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
                     for i in hclen..19 {
                         st.clt[clim[i]] = 0;
                     }
-                    let clb = *st.clt.iter().max().unwrap();
+                    let clb = max(&st.clt);
                     let clbmsk = (1 << clb) - 1;
                     if !st.last && pos + tl * (clb + 7) as usize > tbts {
                         break;
@@ -405,8 +412,8 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
                     }
                     let lt = &st.ldt[0..hlit];
                     let dt = &st.ldt[hlit..tl];
-                    st.lbits = *lt.iter().max().unwrap();
-                    st.dbits = *dt.iter().max().unwrap();
+                    st.lbits = max(lt);
+                    st.dbits = max(dt);
                     hrmap(lt, st.lbits, &mut st.lmap, &mut st.le);
                     hrmap(dt, st.dbits, &mut st.dmap, &mut st.le);
                 }
@@ -419,15 +426,32 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
             }
         }
         st.head = false;
-        let lms = (1 << st.lbits) - 1;
-        let dms = (1 << st.dbits) - 1;
-        let mxa = (st.lbits + st.dbits + 18) as usize;
-        while st.last || pos + mxa < tbts {
-            let c = st.lmap[bits16(dat, pos, lms) as usize];
+        let lms = (1u16 << st.lbits) - 1;
+        let lms8 = lms as u8;
+        let dms = (1u16 << st.dbits) - 1;
+        let dms8 = dms as u8;
+        let topl = tbts - st.lbits as usize;
+        let topd = tbts - st.dbits as usize;
+        let top = tbts - (st.lbits + st.dbits + 18) as usize;
+        while st.last || pos < top {
+            let c = st.lmap[
+                if pos > topl {
+                    return Err(InflateError::UnexpectedEOF);
+                } else if st.lbits < 10 {
+                    if pos > tbts1 {
+                        mbits(dat, pos, lms8) as usize
+                    } else {
+                        bits(dat, pos, lms8) as usize
+                    }
+                } else {
+                    if pos > tbts2 {
+                        mbits16(dat, pos, lms) as usize
+                    } else {
+                        bits16(dat, pos, lms) as usize
+                    }
+                }
+            ];
             pos += (c & 15) as usize;
-            if pos > tbts {
-                return Err(InflateError::UnexpectedEOF);
-            }
             if c == 0 {
                 return Err(InflateError::InvalidLengthOrLiteral);
             }
@@ -440,12 +464,28 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
             } else {
                 let mut add = sym - 254;
                 if add > 10 {
-                    let i = (add as usize) - 3;
+                    let i = add as usize - 3;
                     let b = fleb[i];
-                    add = bits(dat, pos, (1 << b) - 1) as u16 + fl[i as usize];
+                    add = bits(dat, pos, (1 << b) - 1) as u16 + fl[i];
                     pos += b;
                 }
-                let d = st.dmap[bits16(dat, pos, dms) as usize];
+                let d = st.dmap[
+                    if pos > topd {
+                        return Err(InflateError::UnexpectedEOF);
+                    } else if st.dbits < 10 {
+                        if pos > tbts1 {
+                            mbits(dat, pos, dms8) as usize
+                        } else {
+                            bits(dat, pos, dms8) as usize
+                        }
+                    } else {
+                        if pos > tbts2 {
+                            mbits16(dat, pos, dms) as usize
+                        } else {
+                            bits16(dat, pos, dms) as usize
+                        }
+                    }
+                ];
                 if d == 0 {
                     return Err(InflateError::InvalidDistance);
                 }
@@ -475,46 +515,45 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul
 }
 
 pub fn inflate(dat: &[u8], out: &mut dyn OutputBuffer) -> Result<(), InflateError> {
-    out.pre_alloc(dat.len() * 3);
     let mut st = InflateState::new();
     st.last = true;
     inflt(dat, out, &mut st)?;
     Ok(())
 }
 
-pub struct Inflate<'a> {
-    pub sink: &'a mut dyn OutputBuffer,
-    state: InflateState
-}
-
-impl<'a> Inflate<'a> {
-    pub fn push(&mut self, data: &[u8]) -> Result<usize, InflateError> {
-        inflt(data, self.sink, &mut self.state)?;
-        let bytes = self.state.pos >> 3;
-        self.state.pos &= 7;
-        Ok(bytes)
-    }
-    pub fn end(&mut self) -> Result<(), InflateError> {
-        self.state.last = true;
-        self.push(&et)?;
-        Ok(())
-    }
-    pub fn new(sink: &'a mut dyn OutputBuffer) -> Inflate<'a> {
-        Inflate {
-            state: InflateState::new(),
-            sink: sink
-        }
-    }
-}
-
-#[cfg(feature = "std")]
-impl<'a> Write for Inflate<'a> {
-    #[inline(always)]
-    fn write(&mut self, data: &[u8]) -> Result<usize, Error> {
-        Ok(self.push(data)?)
-    }
-    #[inline(always)]
-    fn flush(&mut self) -> Result<(), Error> {
-        Ok(self.end()?)
-    }
-}
+// // pub struct Inflate<'a> {
+// //     pub sink: &'a mut dyn OutputBuffer,
+// //     state: InflateState
+// // }
+
+// // impl<'a> Inflate<'a> {
+// //     pub fn push(&mut self, data: &[u8]) -> Result<usize, InflateError> {
+// //         inflt(data, self.sink, &mut self.state)?;
+// //         let bytes = self.state.pos >> 3;
+// //         self.state.pos &= 7;
+// //         Ok(bytes)
+// //     }
+// //     pub fn end(&mut self) -> Result<(), InflateError> {
+// //         self.state.last = true;
+// //         self.push(&et)?;
+// //         Ok(())
+// //     }
+// //     pub fn new(sink: &'a mut dyn OutputBuffer) -> Inflate<'a> {
+// //         Inflate {
+// //             state: InflateState::new(),
+// //             sink: sink
+// //         }
+// //     }
+// // }
+
+// #[cfg(feature = "std")]
+// impl<'a> Write for Inflate<'a> {
+//     #[inline(always)]
+//     fn write(&mut self, data: &[u8]) -> Result<usize, Error> {
+//         Ok(self.push(data)?)
+//     }
+//     #[inline(always)]
+//     fn flush(&mut self) -> Result<(), Error> {
+//         Ok(self.end()?)
+//     }
+// }

+ 26 - 16
src/index.ts

@@ -240,7 +240,6 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => {
         pos += hcLen * 3;
         // code lengths bits
         const clb = max(clt), clbmsk = (1 << clb) - 1;
-        if (!noSt && pos + tl * (clb + 7) > tbts) break;
         // code lengths map
         const clm = hMap(clt, clb, 1);
         for (let i = 0; i < tl;) {
@@ -270,25 +269,30 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => {
         lm = hMap(lt, lbt, 1);
         dm = hMap(dt, dbt, 1);
       } else throw 'invalid block type';
-      if (pos > tbts) throw 'unexpected EOF';
+      if (pos > tbts) {
+        if (noSt) throw 'unexpected EOF';
+        break;
+      }
     }
     // Make sure the buffer can hold this + the largest possible addition
     // Maximum chunk size (practically, theoretically infinite) is 2^17;
     if (noBuf) cbuf(bt + 131072);
     const lms = (1 << lbt) - 1, dms = (1 << dbt) - 1;
-    const mxa = lbt + dbt + 18;
-    while (noSt || pos + mxa < tbts) {
+    let lpos = pos;
+    for (;; lpos = pos) {
       // bits read, code
       const c = lm[bits16(dat, pos) & lms], sym = c >>> 4;
       pos += c & 15;
-      if (pos > tbts) throw 'unexpected EOF';
+      if (pos > tbts) {
+        if (noSt) throw 'unexpected EOF';
+        break;
+      }
       if (!c) throw 'invalid length/literal';
       if (sym < 256) buf[bt++] = sym;
       else if (sym == 256) {
-        lm = null;
+        lpos = pos, lm = null;
         break;
-      }
-      else {
+      } else {
         let add = sym - 254;
         // no extra bits needed if less
         if (sym > 264) {
@@ -306,7 +310,10 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => {
           const b = fdeb[dsym];
           dt += bits16(dat, pos) & ((1 << b) - 1), pos += b;
         }
-        if (pos > tbts) throw 'unexpected EOF';
+        if (pos > tbts) {
+          if (noSt) throw 'unexpected EOF';
+          break;
+        }
         if (noBuf) cbuf(bt + 131072);
         const end = bt + add;
         for (; bt < end; bt += 4) {
@@ -318,7 +325,7 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => {
         bt = end;
       }
     }
-    st.l = lm, st.p = pos, st.b = bt;
+    st.l = lm, st.p = lpos, st.b = bt;
     if (lm) final = 1, st.m = lbt, st.d = dm, st.n = dbt;
   } while (!final)
   return bt == buf.length ? buf : slc(buf, 0, bt);
@@ -706,7 +713,7 @@ const adler = (): CRCV => {
     },
     d() {
       a %= 65521, b %= 65521;
-      return ((a >>> 8) << 16 | (b & 255) << 8 | (b >>> 8)) + ((a & 255) << 23) * 2;
+      return (a & 255) << 24 | (a >>> 8) << 16 | (b & 255) << 8 | (b >>> 8);
     }
   }
 }
@@ -983,9 +990,9 @@ const astrmify = <T>(fns: (() => unknown[])[], strm: Astrm, opts: T | 0, init: (
 const b2 = (d: Uint8Array, b: number) => d[b] | (d[b + 1] << 8);
 
 // read 4 bytes
-const b4 = (d: Uint8Array, b: number) => (d[b] | (d[b + 1] << 8) | (d[b + 2] << 16)) + (d[b + 3] << 23) * 2;
+const b4 = (d: Uint8Array, b: number) => (d[b] | (d[b + 1] << 8) | (d[b + 2] << 16) | (d[b + 3] << 24)) >>> 0;
 
-const b8 = (d: Uint8Array, b: number) => b4(d, b) | (b4(d, b) * 4294967296);
+const b8 = (d: Uint8Array, b: number) => b4(d, b) + (b4(d, b + 4) * 4294967296);
 
 // write bytes
 const wbytes = (d: Uint8Array, b: number, v: number) => {
@@ -1018,7 +1025,7 @@ const gzs = (d: Uint8Array) => {
 // gzip length
 const gzl = (d: Uint8Array) => {
   const l = d.length;
-  return (d[l - 4] | d[l - 3] << 8 | d[l - 2] << 16) + (2 * (d[l - 1] << 23));
+  return ((d[l - 4] | d[l - 3] << 8 | d[l - 2] << 16) | (d[l - 1] << 24)) >>> 0;
 }
 
 // gzip header length
@@ -2081,7 +2088,10 @@ export class DecodeUTF8 {
   push(chunk: Uint8Array, final?: boolean) {
     if (!this.ondata) throw 'no callback';
     if (!final) final = false;
-    if (this.t) return this.ondata(this.t.decode(chunk, { stream: !final }), final);
+    if (this.t) {
+      this.ondata(this.t.decode(chunk, { stream: true }), false);
+      if (final) this.ondata(this.t.decode(), true);
+    }
     const dat = new u8(this.p.length + chunk.length);
     dat.set(this.p);
     dat.set(chunk, this.p.length);
@@ -2227,7 +2237,7 @@ const wzh = (d: Uint8Array, b: number, f: ZHF, fn: Uint8Array, u: boolean, c?: n
   d[b++] = f.compression & 255, d[b++] = f.compression >> 8;
   const dt = new Date(f.mtime == null ? Date.now() : f.mtime), y = dt.getFullYear() - 1980;
   if (y < 0 || y > 119) throw 'date not in range 1980-2099';
-  wbytes(d, b, ((y << 24) * 2) | ((dt.getMonth() + 1) << 21) | (dt.getDate() << 16) | (dt.getHours() << 11) | (dt.getMinutes() << 5) | (dt.getSeconds() >>> 1)), b += 4;
+  wbytes(d, b, (y << 25) | ((dt.getMonth() + 1) << 21) | (dt.getDate() << 16) | (dt.getHours() << 11) | (dt.getMinutes() << 5) | (dt.getSeconds() >>> 1)), b += 4;
   if (c != null) {
     wbytes(d, b, f.crc);
     wbytes(d, b + 4, c);