소스 검색

1. fix the potential infinite loop when composing delta. Because of calculating the wrong code unit offset.
2. add test of calculating Chinese character

appflowy 3 년 전
부모
커밋
7e7254b306

+ 26 - 1
frontend/rust-lib/flowy-document/tests/document/document_test.rs

@@ -44,7 +44,19 @@ async fn document_sync_insert_test() {
 }
 
 #[tokio::test]
-async fn document_sync_delete_test1() {
+async fn document_sync_insert_in_chinese() {
+    let s = "好".to_owned();
+    let offset = count_utf16_code_units(&s);
+    let scripts = vec![
+        InsertText("你", 0),
+        InsertText("好", offset),
+        AssertJson(r#"[{"insert":"你好\n"}]"#),
+    ];
+    EditorTest::new().await.run_scripts(scripts).await;
+}
+
+#[tokio::test]
+async fn document_sync_delete_in_english() {
     let scripts = vec![
         InsertText("1", 0),
         InsertText("2", 1),
@@ -55,6 +67,19 @@ async fn document_sync_delete_test1() {
     EditorTest::new().await.run_scripts(scripts).await;
 }
 
+#[tokio::test]
+async fn document_sync_delete_in_chinese() {
+    let s = "好".to_owned();
+    let offset = count_utf16_code_units(&s);
+    let scripts = vec![
+        InsertText("你", 0),
+        InsertText("好", offset),
+        Delete(Interval::new(0, offset)),
+        AssertJson(r#"[{"insert":"好\n"}]"#),
+    ];
+    EditorTest::new().await.run_scripts(scripts).await;
+}
+
 #[tokio::test]
 async fn document_sync_replace_test() {
     let scripts = vec![

+ 1 - 0
frontend/rust-lib/flowy-document/tests/editor/attribute_test.rs

@@ -727,6 +727,7 @@ fn attributes_format_emoji() {
     let len = s.utf16_size();
     assert_eq!(3, len);
     assert_eq!(2, s.graphemes(true).count());
+    
     let ops = vec![
         Insert(0, emoji_s, 0),
         AssertDocJson(0, r#"[{"insert":"👋 \n"}]"#),

+ 51 - 35
frontend/rust-lib/flowy-document/tests/editor/op_test.rs

@@ -184,7 +184,7 @@ fn delta_get_ops_in_interval_7() {
 }
 
 #[test]
-fn delta_seek_1() {
+fn delta_op_seek() {
     let mut delta = RichTextDelta::default();
     let insert_a = OpBuilder::insert("12345").build();
     let retain_a = OpBuilder::retain(3).build();
@@ -196,66 +196,72 @@ fn delta_seek_1() {
 }
 
 #[test]
-fn delta_seek_2() {
+fn delta_utf16_code_unit_seek() {
     let mut delta = RichTextDelta::default();
     delta.add(OpBuilder::insert("12345").build());
 
     let mut iter = DeltaIter::new(&delta);
-    assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("1").build());
+    iter.seek::<Utf16CodeUnitMetric>(3);
+    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("45").build());
 }
 
 #[test]
-fn delta_seek_3() {
+fn delta_utf16_code_unit_seek_with_attributes() {
     let mut delta = RichTextDelta::default();
-    delta.add(OpBuilder::insert("12345").build());
+    let attributes = AttributeBuilder::new()
+        .add_attr(RichTextAttribute::Bold(true))
+        .add_attr(RichTextAttribute::Italic(true))
+        .build();
+
+    delta.add(OpBuilder::insert("1234").attributes(attributes.clone()).build());
+    delta.add(OpBuilder::insert("\n").build());
 
     let mut iter = DeltaIter::new(&delta);
-    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
+    iter.seek::<Utf16CodeUnitMetric>(0);
 
-    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
+    assert_eq!(
+        iter.next_op_with_len(4).unwrap(),
+        OpBuilder::insert("1234").attributes(attributes).build(),
+    );
+}
 
+#[test]
+fn delta_next_op_len() {
+    let mut delta = RichTextDelta::default();
+    delta.add(OpBuilder::insert("12345").build());
+    let mut iter = DeltaIter::new(&delta);
+    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("12").build());
+    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("34").build());
     assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("5").build());
-
     assert_eq!(iter.next_op_with_len(1), None);
 }
 
 #[test]
-fn delta_seek_4() {
+fn delta_next_op_len_with_chinese() {
     let mut delta = RichTextDelta::default();
-    delta.add(OpBuilder::insert("12345").build());
+    delta.add(OpBuilder::insert("你好").build());
 
     let mut iter = DeltaIter::new(&delta);
-    iter.seek::<CharMetric>(3);
-    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("45").build());
+    assert_eq!(iter.next_op_len().unwrap(), 2);
+    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("你好").build());
 }
 
 #[test]
-fn delta_seek_5() {
+fn delta_next_op_len_with_english() {
     let mut delta = RichTextDelta::default();
-    let attributes = AttributeBuilder::new()
-        .add_attr(RichTextAttribute::Bold(true))
-        .add_attr(RichTextAttribute::Italic(true))
-        .build();
-
-    delta.add(OpBuilder::insert("1234").attributes(attributes.clone()).build());
-    delta.add(OpBuilder::insert("\n").build());
-
+    delta.add(OpBuilder::insert("ab").build());
     let mut iter = DeltaIter::new(&delta);
-    iter.seek::<CharMetric>(0);
-
-    assert_eq!(
-        iter.next_op_with_len(4).unwrap(),
-        OpBuilder::insert("1234").attributes(attributes).build(),
-    );
+    assert_eq!(iter.next_op_len().unwrap(), 2);
+    assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::insert("ab").build());
 }
 
 #[test]
-fn delta_next_op_len_test() {
+fn delta_next_op_len_after_seek() {
     let mut delta = RichTextDelta::default();
     delta.add(OpBuilder::insert("12345").build());
-
     let mut iter = DeltaIter::new(&delta);
-    iter.seek::<CharMetric>(3);
+    assert_eq!(iter.next_op_len().unwrap(), 5);
+    iter.seek::<Utf16CodeUnitMetric>(3);
     assert_eq!(iter.next_op_len().unwrap(), 2);
     assert_eq!(iter.next_op_with_len(1).unwrap(), OpBuilder::insert("4").build());
     assert_eq!(iter.next_op_len().unwrap(), 1);
@@ -263,7 +269,7 @@ fn delta_next_op_len_test() {
 }
 
 #[test]
-fn delta_next_op_len_test2() {
+fn delta_next_op_len_none() {
     let mut delta = RichTextDelta::default();
     delta.add(OpBuilder::insert("12345").build());
     let mut iter = DeltaIter::new(&delta);
@@ -290,7 +296,7 @@ fn delta_next_op_with_len_cross_op_return_last() {
     delta.add(OpBuilder::insert("678").build());
 
     let mut iter = DeltaIter::new(&delta);
-    iter.seek::<CharMetric>(4);
+    iter.seek::<Utf16CodeUnitMetric>(4);
     assert_eq!(iter.next_op_len().unwrap(), 1);
     assert_eq!(iter.next_op_with_len(2).unwrap(), OpBuilder::retain(1).build());
 }
@@ -475,7 +481,7 @@ fn transform_random_delta() {
 }
 
 #[test]
-fn transform_with_two_delta_test() {
+fn transform_with_two_delta() {
     let mut a = RichTextDelta::default();
     let mut a_s = String::new();
     a.insert(
@@ -515,7 +521,7 @@ fn transform_with_two_delta_test() {
 }
 
 #[test]
-fn transform_two_plain_delta_test() {
+fn transform_two_plain_delta() {
     let ops = vec![
         Insert(0, "123", 0),
         Insert(1, "456", 0),
@@ -527,7 +533,7 @@ fn transform_two_plain_delta_test() {
 }
 
 #[test]
-fn transform_two_plain_delta_test2() {
+fn transform_two_plain_delta2() {
     let ops = vec![
         Insert(0, "123", 0),
         Insert(1, "456", 0),
@@ -721,6 +727,16 @@ fn delta_invert_attribute_delta_with_attribute_delta() {
     TestBuilder::new().run_scripts::<PlainDoc>(ops);
 }
 
+#[test]
+fn delta_compose_str() {
+    let ops = vec![
+        Insert(0, "1", 0),
+        Insert(0, "2", 1),
+        AssertDocJson(0, r#"[{"insert":"12\n"}]"#),
+    ];
+    TestBuilder::new().run_scripts::<NewlineDoc>(ops);
+}
+
 #[test]
 #[should_panic]
 fn delta_compose_with_missing_delta() {

+ 0 - 1
shared-lib/flowy-collaboration/src/document/document.rs

@@ -112,7 +112,6 @@ impl Document {
         let text = data.to_string();
         let interval = Interval::new(index, index);
         let _ = validate_interval(&self.delta, &interval)?;
-
         let delta = self.view.insert(&self.delta, &text, interval)?;
         self.compose_delta(delta.clone())?;
         Ok(delta)

+ 2 - 2
shared-lib/flowy-collaboration/src/document/extensions/delete/preserve_line_format_merge.rs

@@ -1,6 +1,6 @@
 use crate::{document::DeleteExt, util::is_newline};
 use lib_ot::{
-    core::{Attributes, CharMetric, DeltaBuilder, DeltaIter, Interval, NEW_LINE},
+    core::{Attributes, DeltaBuilder, DeltaIter, Interval, Utf16CodeUnitMetric, NEW_LINE},
     rich_text::{plain_attributes, RichTextDelta},
 };
 
@@ -22,7 +22,7 @@ impl DeleteExt for PreserveLineFormatOnMerge {
             return None;
         }
 
-        iter.seek::<CharMetric>(interval.size() - 1);
+        iter.seek::<Utf16CodeUnitMetric>(interval.size() - 1);
         let mut new_delta = DeltaBuilder::new()
             .retain(interval.start)
             .delete(interval.size())

+ 2 - 2
shared-lib/flowy-collaboration/src/document/extensions/insert/reset_format_on_new_line.rs

@@ -1,6 +1,6 @@
 use crate::{document::InsertExt, util::is_newline};
 use lib_ot::{
-    core::{CharMetric, DeltaBuilder, DeltaIter, NEW_LINE},
+    core::{DeltaBuilder, DeltaIter, Utf16CodeUnitMetric, NEW_LINE},
     rich_text::{RichTextAttributeKey, RichTextAttributes, RichTextDelta},
 };
 
@@ -14,7 +14,7 @@ impl InsertExt for ResetLineFormatOnNewLine {
         }
 
         let mut iter = DeltaIter::new(delta);
-        iter.seek::<CharMetric>(index);
+        iter.seek::<Utf16CodeUnitMetric>(index);
         let next_op = iter.next_op()?;
         if !next_op.get_data().starts_with(NEW_LINE) {
             return None;

+ 1 - 1
shared-lib/flowy-collaboration/src/document/view.rs

@@ -32,7 +32,7 @@ impl View {
         for ext in &self.insert_exts {
             if let Some(mut delta) = ext.apply(delta, interval.size(), text, interval.start) {
                 trim(&mut delta);
-                tracing::debug!("[{}]: applied, delta: {}", ext.ext_name(), delta);
+                tracing::debug!("[{}]: process delta: {}", ext.ext_name(), delta);
                 new_delta = Some(delta);
                 break;
             }

+ 22 - 20
shared-lib/lib-ot/src/core/delta/cursor.rs

@@ -35,13 +35,13 @@ where
     }
 
     // get the next operation interval
-    pub fn next_iv(&self) -> Interval { self.next_iv_before(None).unwrap_or_else(|| Interval::new(0, 0)) }
+    pub fn next_iv(&self) -> Interval { self.next_iv_with_len(None).unwrap_or_else(|| Interval::new(0, 0)) }
 
     pub fn next_op(&mut self) -> Option<Operation<T>> { self.next_with_len(None) }
 
     // get the last operation before the end.
     // checkout the delta_next_op_with_len_cross_op_return_last test for more detail
-    pub fn next_with_len(&mut self, force_end: Option<usize>) -> Option<Operation<T>> {
+    pub fn next_with_len(&mut self, expected_len: Option<usize>) -> Option<Operation<T>> {
         let mut find_op = None;
         let holder = self.next_op.clone();
         let mut next_op = holder.as_ref();
@@ -53,7 +53,9 @@ where
         let mut consume_len = 0;
         while find_op.is_none() && next_op.is_some() {
             let op = next_op.take().unwrap();
-            let interval = self.next_iv_before(force_end).unwrap_or_else(|| Interval::new(0, 0));
+            let interval = self
+                .next_iv_with_len(expected_len)
+                .unwrap_or_else(|| Interval::new(0, 0));
 
             // cache the op if the interval is empty. e.g. last_op_before(Some(0))
             if interval.is_empty() {
@@ -79,7 +81,7 @@ where
         }
 
         if find_op.is_some() {
-            if let Some(end) = force_end {
+            if let Some(end) = expected_len {
                 // try to find the next op before the index if consume_len less than index
                 if end > consume_len && self.has_next() {
                     return self.next_with_len(Some(end - consume_len));
@@ -111,12 +113,12 @@ where
         }
     }
 
-    fn next_iv_before(&self, force_end: Option<usize>) -> Option<Interval> {
+    fn next_iv_with_len(&self, expected_len: Option<usize>) -> Option<Interval> {
         let op = self.next_iter_op()?;
         let start = self.consume_count;
-        let end = match force_end {
+        let end = match expected_len {
             None => self.consume_count + op.len(),
-            Some(index) => self.consume_count + min(index, op.len()),
+            Some(expected_len) => self.consume_count + min(expected_len, op.len()),
         };
 
         let intersect = Interval::new(start, end).intersect(self.consume_iv);
@@ -155,34 +157,34 @@ where
 
 type SeekResult = Result<(), OTError>;
 pub trait Metric {
-    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult;
+    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult;
 }
 
 pub struct OpMetric();
 
 impl Metric for OpMetric {
-    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult {
-        let _ = check_bound(cursor.op_index, index)?;
+    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
+        let _ = check_bound(cursor.op_index, offset)?;
         let mut seek_cursor = OpCursor::new(cursor.delta, cursor.origin_iv);
-        let mut offset = 0;
+        let mut cur_offset = 0;
         while let Some((_, op)) = seek_cursor.iter.next() {
-            offset += op.len();
-            if offset > index {
+            cur_offset += op.len();
+            if cur_offset > offset {
                 break;
             }
         }
-        cursor.descend(offset);
+        cursor.descend(cur_offset);
         Ok(())
     }
 }
 
-pub struct CharMetric();
+pub struct Utf16CodeUnitMetric();
 
-impl Metric for CharMetric {
-    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, index: usize) -> SeekResult {
-        if index > 0 {
-            let _ = check_bound(cursor.consume_count, index)?;
-            let _ = cursor.next_with_len(Some(index));
+impl Metric for Utf16CodeUnitMetric {
+    fn seek<T: Attributes>(cursor: &mut OpCursor<T>, offset: usize) -> SeekResult {
+        if offset > 0 {
+            let _ = check_bound(cursor.consume_count, offset)?;
+            let _ = cursor.next_with_len(Some(offset));
         }
 
         Ok(())

+ 3 - 3
shared-lib/lib-ot/src/core/delta/delta.rs

@@ -187,7 +187,7 @@ where
                     }
                 },
                 Operation::Insert(insert) => {
-                    inverted.delete(insert.count_of_utf16_code_units());
+                    inverted.delete(insert.utf16_size());
                 },
                 Operation::Delete(delete) => {
                     inverted.insert(&chars.take(*delete as usize).collect::<String>(), op.get_attributes());
@@ -294,12 +294,12 @@ where
                 (Some(Operation::Insert(insert)), _) => {
                     // let composed_attrs = transform_attributes(&next_op1, &next_op2, true);
                     a_prime.insert(&insert.s, insert.attributes.clone());
-                    b_prime.retain(insert.count_of_utf16_code_units(), insert.attributes.clone());
+                    b_prime.retain(insert.utf16_size(), insert.attributes.clone());
                     next_op1 = ops1.next();
                 },
                 (_, Some(Operation::Insert(o_insert))) => {
                     let composed_attrs = transform_op_attribute(&next_op1, &next_op2)?;
-                    a_prime.retain(o_insert.count_of_utf16_code_units(), composed_attrs.clone());
+                    a_prime.retain(o_insert.utf16_size(), composed_attrs.clone());
                     b_prime.insert(&o_insert.s, composed_attrs);
                     next_op2 = ops2.next();
                 },

+ 2 - 2
shared-lib/lib-ot/src/core/delta/iterator.rs

@@ -23,7 +23,7 @@ where
     pub fn from_offset(delta: &'a Delta<T>, offset: usize) -> Self {
         let interval = Interval::new(0, MAX_IV_LEN);
         let mut iter = Self::from_interval(delta, interval);
-        iter.seek::<CharMetric>(offset);
+        iter.seek::<Utf16CodeUnitMetric>(offset);
         iter
     }
 
@@ -181,7 +181,7 @@ where
             Operation::<T>::Insert(insert) => {
                 tracing::trace!("extend insert attributes with {} ", &insert.attributes);
                 attributes.extend_other(insert.attributes.clone());
-                length = insert.count_of_utf16_code_units();
+                length = insert.utf16_size();
             },
         }
 

+ 109 - 95
shared-lib/lib-ot/src/core/flowy_str.rs

@@ -1,5 +1,5 @@
 use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
-use std::{fmt, fmt::Formatter, slice};
+use std::{fmt, fmt::Formatter};
 
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct FlowyStr(pub String);
@@ -10,18 +10,11 @@ impl FlowyStr {
 
     pub fn utf16_code_unit_iter(&self) -> Utf16CodeUnitIterator { Utf16CodeUnitIterator::new(self) }
 
-    pub fn sub_str(&self, interval: Interval) -> String {
-        match self.with_interval(interval) {
-            None => "".to_owned(),
-            Some(s) => s.0,
-        }
-    }
-
-    pub fn with_interval(&self, interval: Interval) -> Option<FlowyStr> {
+    pub fn sub_str(&self, interval: Interval) -> Option<String> {
         let mut iter = Utf16CodeUnitIterator::new(self);
         let mut buf = vec![];
         while let Some((byte, _len)) = iter.next() {
-            if interval.start < iter.code_unit_offset && interval.end >= iter.code_unit_offset {
+            if iter.utf16_offset >= interval.start && iter.utf16_offset < interval.end {
                 buf.extend_from_slice(byte);
             }
         }
@@ -31,7 +24,7 @@ impl FlowyStr {
         }
 
         match str::from_utf8(&buf) {
-            Ok(item) => Some(item.into()),
+            Ok(item) => Some(item.to_owned()),
             Err(_e) => None,
         }
     }
@@ -40,69 +33,6 @@ impl FlowyStr {
     fn utf16_code_point_iter(&self) -> FlowyUtf16CodePointIterator { FlowyUtf16CodePointIterator::new(self, 0) }
 }
 
-pub struct Utf16CodeUnitIterator<'a> {
-    s: &'a FlowyStr,
-    bytes_offset: usize,
-    code_unit_offset: usize,
-    iter_index: usize,
-    iter: slice::Iter<'a, u8>,
-}
-
-impl<'a> Utf16CodeUnitIterator<'a> {
-    pub fn new(s: &'a FlowyStr) -> Self {
-        Utf16CodeUnitIterator {
-            s,
-            bytes_offset: 0,
-            code_unit_offset: 0,
-            iter_index: 0,
-            iter: s.as_bytes().iter(),
-        }
-    }
-}
-
-impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
-    type Item = (&'a [u8], usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let start = self.bytes_offset;
-        let _end = start;
-
-        while let Some(&b) = self.iter.next() {
-            self.iter_index += 1;
-
-            let mut code_unit_count = 0;
-            if self.bytes_offset > self.iter_index {
-                continue;
-            }
-
-            if self.bytes_offset == self.iter_index {
-                break;
-            }
-
-            if (b as i8) >= -0x40 {
-                code_unit_count += 1
-            }
-            if b >= 0xf0 {
-                code_unit_count += 1
-            }
-
-            self.bytes_offset += len_utf8_from_first_byte(b);
-            self.code_unit_offset += code_unit_count;
-
-            if code_unit_count == 1 {
-                break;
-            }
-        }
-
-        if start == self.bytes_offset {
-            return None;
-        }
-
-        let byte = &self.s.as_bytes()[start..self.bytes_offset];
-        Some((byte, self.bytes_offset - start))
-    }
-}
-
 impl std::ops::Deref for FlowyStr {
     type Target = String;
 
@@ -170,6 +100,52 @@ impl<'de> Deserialize<'de> for FlowyStr {
     }
 }
 
+pub struct Utf16CodeUnitIterator<'a> {
+    s: &'a FlowyStr,
+    byte_offset: usize,
+    utf16_offset: usize,
+    utf16_count: usize,
+}
+
+impl<'a> Utf16CodeUnitIterator<'a> {
+    pub fn new(s: &'a FlowyStr) -> Self {
+        Utf16CodeUnitIterator {
+            s,
+            byte_offset: 0,
+            utf16_offset: 0,
+            utf16_count: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for Utf16CodeUnitIterator<'a> {
+    type Item = (&'a [u8], usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let _len = self.s.len();
+        if self.byte_offset == self.s.len() {
+            None
+        } else {
+            let b = self.s.as_bytes()[self.byte_offset];
+            let start = self.byte_offset;
+            let end = self.byte_offset + len_utf8_from_first_byte(b);
+            if (b as i8) >= -0x40 {
+                self.utf16_count += 1;
+            }
+            if b >= 0xf0 {
+                self.utf16_count += 1;
+            }
+
+            if self.utf16_count > 0 {
+                self.utf16_offset = self.utf16_count - 1;
+            }
+            self.byte_offset = end;
+            let byte = &self.s.as_bytes()[start..end];
+            Some((byte, end - start))
+        }
+    }
+}
+
 pub struct FlowyUtf16CodePointIterator<'a> {
     s: &'a FlowyStr,
     offset: usize,
@@ -230,38 +206,76 @@ pub fn len_utf8_from_first_byte(b: u8) -> usize {
 mod tests {
     use crate::core::{FlowyStr, Interval};
 
+    #[test]
+    fn flowy_str_code_unit() {
+        let size = FlowyStr::from("👋").utf16_size();
+        assert_eq!(size, 2);
+
+        let s: FlowyStr = "👋 \n👋".into();
+        let output = s.sub_str(Interval::new(0, size)).unwrap();
+        assert_eq!(output, "👋");
+
+        let output = s.sub_str(Interval::new(2, 3)).unwrap();
+        assert_eq!(output, " ");
+
+        let output = s.sub_str(Interval::new(3, 4)).unwrap();
+        assert_eq!(output, "\n");
+
+        let output = s.sub_str(Interval::new(4, 4 + size)).unwrap();
+        assert_eq!(output, "👋");
+    }
+
+    #[test]
+    fn flowy_str_sub_str_in_chinese() {
+        let s: FlowyStr = "你好\n😁".into();
+        let size = s.utf16_size();
+        assert_eq!(size, 5);
+
+        let output1 = s.sub_str(Interval::new(0, 2)).unwrap();
+        let output2 = s.sub_str(Interval::new(2, 3)).unwrap();
+        let output3 = s.sub_str(Interval::new(3, 5)).unwrap();
+        assert_eq!(output1, "你好");
+        assert_eq!(output2, "\n");
+        assert_eq!(output3, "😁");
+    }
+
+    #[test]
+    fn flowy_str_sub_str_in_chinese2() {
+        let s: FlowyStr = "😁 \n".into();
+        let size = s.utf16_size();
+        assert_eq!(size, 4);
+
+        let output1 = s.sub_str(Interval::new(0, 3)).unwrap();
+        let output2 = s.sub_str(Interval::new(3, 4)).unwrap();
+        assert_eq!(output1, "😁 ");
+        assert_eq!(output2, "\n");
+    }
+
+    #[test]
+    fn flowy_str_sub_str_in_english() {
+        let s: FlowyStr = "ab".into();
+        let size = s.utf16_size();
+        assert_eq!(size, 2);
+
+        let output = s.sub_str(Interval::new(0, 2)).unwrap();
+        assert_eq!(output, "ab");
+    }
+
     #[test]
     fn flowy_str_utf16_code_point_iter_test1() {
-        let s: FlowyStr = "👋😁👋😁".into();
+        let s: FlowyStr = "👋😁👋".into();
         let mut iter = s.utf16_code_point_iter();
         assert_eq!(iter.next().unwrap(), "👋".to_string());
         assert_eq!(iter.next().unwrap(), "😁".to_string());
         assert_eq!(iter.next().unwrap(), "👋".to_string());
-        assert_eq!(iter.next().unwrap(), "😁".to_string());
         assert_eq!(iter.next(), None);
     }
 
     #[test]
     fn flowy_str_utf16_code_point_iter_test2() {
-        let s: FlowyStr = "👋👋😁😁👋👋".into();
+        let s: FlowyStr = "👋😁👋".into();
         let iter = s.utf16_code_point_iter();
-        let result = iter.skip(2).take(2).collect::<String>();
-        assert_eq!(result, "😁😁".to_string());
-    }
-
-    #[test]
-    fn flowy_str_code_unit_test() {
-        let s: FlowyStr = "👋 \n👋".into();
-        let output = s.with_interval(Interval::new(0, 2)).unwrap().0;
-        assert_eq!(output, "👋");
-
-        let output = s.with_interval(Interval::new(2, 3)).unwrap().0;
-        assert_eq!(output, " ");
-
-        let output = s.with_interval(Interval::new(3, 4)).unwrap().0;
-        assert_eq!(output, "\n");
-
-        let output = s.with_interval(Interval::new(4, 6)).unwrap().0;
-        assert_eq!(output, "👋");
+        let result = iter.skip(1).take(1).collect::<String>();
+        assert_eq!(result, "😁".to_string());
     }
 }

+ 5 - 12
shared-lib/lib-ot/src/core/operation/operation.rs

@@ -67,7 +67,7 @@ where
         match self {
             Operation::Delete(n) => *n,
             Operation::Retain(r) => r.n,
-            Operation::Insert(i) => i.count_of_utf16_code_units(),
+            Operation::Insert(i) => i.utf16_size(),
         }
     }
 
@@ -95,7 +95,7 @@ where
                         .build(),
                 );
                 right = Some(
-                    OpBuilder::<T>::insert(&insert.s[index..insert.count_of_utf16_code_units()])
+                    OpBuilder::<T>::insert(&insert.s[index..insert.utf16_size()])
                         .attributes(attributes)
                         .build(),
                 );
@@ -112,17 +112,10 @@ where
                 .attributes(retain.attributes.clone())
                 .build(),
             Operation::Insert(insert) => {
-                if interval.start > insert.count_of_utf16_code_units() {
+                if interval.start > insert.utf16_size() {
                     OpBuilder::insert("").build()
                 } else {
-                    // let s = &insert
-                    //     .s
-                    //     .chars()
-                    //     .skip(interval.start)
-                    //     .take(min(interval.size(), insert.count_of_code_units()))
-                    //     .collect::<String>();
-
-                    let s = insert.s.sub_str(interval);
+                    let s = insert.s.sub_str(interval).unwrap_or_else(|| "".to_owned());
                     OpBuilder::insert(&s).attributes(insert.attributes.clone()).build()
                 }
             },
@@ -291,7 +284,7 @@ impl<T> Insert<T>
 where
     T: Attributes,
 {
-    pub fn count_of_utf16_code_units(&self) -> usize { self.s.utf16_size() }
+    pub fn utf16_size(&self) -> usize { self.s.utf16_size() }
 
     pub fn merge_or_new_op(&mut self, s: &str, attributes: T) -> Option<Operation<T>> {
         if self.attributes == attributes {