浏览代码

[rust]: fix code point issue

appflowy 3 年之前
父节点
当前提交
5cf47e9f55

+ 1 - 1
app_flowy/lib/workspace/application/doc/doc_bloc.dart

@@ -123,7 +123,7 @@ class DocBloc extends Bloc<DocEvent, DocState> {
 
   Document _decodeJsonToDocument(String data) {
     // String d = r'''
-    //     [{"insert":"\n👋 Welcome to AppFlowy!\n"},{"insert":"\n","attributes":{"header":1}},{"insert":"Here are the basics\n"},{"insert":"lick anywhere and just start typing\n"},{"insert":"H","attributes":{"list":"unchecked"}},{"insert":"ighlight any text, and use the menu at the bottom to style your writing however you like\n"},{"insert":"C","attributes":{"list":"unchecked"}},{"insert":"lick + New Page button at the bottom of your sidebar to add a new page\n"},{"insert":"C","attributes":{"list":"unchecked"}},{"insert":"lick the +  next to any page title in the sidebar to quickly add a new subpage\n"},{"insert":"\n","attributes":{"list":"unchecked"}},{"insert":"Have a question? \n"},{"insert":"lick the '?' at the bottom right for help and support.\n\nLike AppFlowy? Follow us:\n"},{"insert":"G","attributes":{"header":2}},{"insert":"ithub: https://github.com/AppFlowy-IO/appflowy\n"},{"insert":"T","attributes":{"blockquote":true}},{"insert":"witter: https://twitter.com/appflowy\n"},{"insert":"N","attributes":{"blockquote":true}},{"insert":"ewsletter: https://www.appflowy.io/blog\n"},{"retain":1,"attributes":{"blockquote":true}},{"insert":"\n"}]
+    //     "[{"insert":"\n👋 Welcome to AppFlowy!\n"},{"insert":"\n","attributes":{"header":1}},{"insert":"Here are the basics\n"},{"insert":"C","attributes":{"header":2}},{"insert":"lick anywhere and just start typing\n"},{"insert":"H","attributes":{"list":"unchecked"}},{"insert":"ighlight any text, and use the menu at the bottom to style your writing however you like\n"},{"insert":"C","attributes":{"list":"unchecked"}},{"insert":"lick + New Page button at the bottom of your sidebar to add a new page\n"},{"insert":"C","attributes":{"list":"unchecked"}},{"insert":"lick the +  next to any page title in the sidebar to quickly add a new subpage\n"},{"insert":"\n","attributes":{"list":"unchecked"}},{"insert":"Have a question? \n"},{"insert":"C","attributes":{"header":2}},{"insert":"lick the '?' at the bottom right for help and support.\n\nLike AppFlowy? Follow us:\n"},{"insert":"G","attributes":{"header":2}},{"insert":"ithub: https://github.com/AppFlowy-IO/appflowy\n"},{"insert":"T","attributes":{"blockquote":true}},{"insert":"witter: https://twitter.com/appflowy\n"},{"insert":"N","attributes":{"blockquote":true}},{"insert":"ewsletter: https://www.appflowy.io/blog\n"},{"retain":1,"attributes":{"blockquote":true}},{"insert":"\n"}]"
     //     ''';
 
     final json = jsonDecode(data);

+ 1 - 1
backend/src/service/doc/edit/edit_doc.rs

@@ -181,7 +181,7 @@ impl ServerEditDoc {
                 log::error!("Failed to acquire write lock of document");
             },
             Some(mut write_guard) => {
-                let _ = write_guard.compose_delta(&delta).map_err(internal_error)?;
+                let _ = write_guard.compose_delta(delta).map_err(internal_error)?;
                 tracing::Span::current().record("result", &write_guard.to_json().as_str());
             },
         }

+ 1 - 1
backend/tests/api/workspace.rs

@@ -58,7 +58,7 @@ async fn workspace_update() {
 async fn workspace_delete() {
     let test = WorkspaceTest::new().await;
     let delete_params = WorkspaceIdentifier {
-        workspace_id: test.workspace.id.clone(),
+        workspace_id: Some(test.workspace.id.clone()),
     };
 
     let _ = test.server.delete_workspace(delete_params).await;

+ 1 - 1
rust-lib/flowy-document/Cargo.toml

@@ -21,7 +21,7 @@ flowy-net = { path = "../flowy-net", features = ["flowy_request"] }
 diesel = {version = "1.4.7", features = ["sqlite"]}
 diesel_derives = {version = "1.4.1", features = ["sqlite"]}
 protobuf = {version = "2.18.0"}
-unicode-segmentation = "1.7.1"
+unicode-segmentation = "1.8"
 lazy_static = "1.4.0"
 log = "0.4.14"
 tokio = {version = "1", features = ["sync"]}

+ 18 - 6
rust-lib/flowy-document/src/services/doc/document/document.rs

@@ -77,9 +77,10 @@ impl Document {
         }
     }
 
-    pub fn compose_delta(&mut self, delta: &Delta) -> Result<(), DocError> {
+    pub fn compose_delta(&mut self, mut delta: Delta) -> Result<(), DocError> {
+        trim(&mut delta);
         tracing::trace!("{} compose {}", &self.delta.to_json(), delta.to_json());
-        let composed_delta = self.delta.compose(delta)?;
+        let mut composed_delta = self.delta.compose(&delta)?;
         let mut undo_delta = delta.invert(&self.delta);
 
         let now = chrono::Utc::now().timestamp_millis() as usize;
@@ -100,6 +101,8 @@ impl Document {
         }
 
         tracing::trace!("compose result: {}", composed_delta.to_json());
+        trim(&mut composed_delta);
+
         self.set_delta(composed_delta);
         Ok(())
     }
@@ -111,7 +114,7 @@ impl Document {
         let text = data.to_string();
         let delta = self.view.insert(&self.delta, &text, interval)?;
         tracing::trace!("👉 receive change: {}", delta);
-        self.compose_delta(&delta)?;
+        self.compose_delta(delta.clone())?;
         Ok(delta)
     }
 
@@ -121,7 +124,7 @@ impl Document {
         let delete = self.view.delete(&self.delta, interval)?;
         if !delete.is_empty() {
             tracing::trace!("👉 receive change: {}", delete);
-            let _ = self.compose_delta(&delete)?;
+            let _ = self.compose_delta(delete.clone())?;
         }
         Ok(delete)
     }
@@ -132,7 +135,7 @@ impl Document {
         let format_delta = self.view.format(&self.delta, attribute.clone(), interval).unwrap();
 
         tracing::trace!("👉 receive change: {}", format_delta);
-        self.compose_delta(&format_delta)?;
+        self.compose_delta(format_delta.clone())?;
         Ok(format_delta)
     }
 
@@ -143,7 +146,7 @@ impl Document {
         if !text.is_empty() {
             delta = self.view.insert(&self.delta, &text, interval)?;
             tracing::trace!("👉 receive change: {}", delta);
-            self.compose_delta(&delta)?;
+            self.compose_delta(delta.clone())?;
         }
 
         if !interval.is_empty() {
@@ -206,3 +209,12 @@ fn validate_interval(delta: &Delta, interval: &Interval) -> Result<(), DocError>
     }
     Ok(())
 }
+
+/// Removes trailing retain operation with empty attributes, if present.
+pub fn trim(delta: &mut Delta) {
+    if let Some(last) = delta.ops.last() {
+        if last.is_retain() && last.is_plain() {
+            delta.ops.pop();
+        }
+    }
+}

+ 2 - 5
rust-lib/flowy-document/src/services/doc/edit/doc_actor.rs

@@ -104,10 +104,6 @@ impl DocumentActor {
                 let data = self.document.read().await.to_json();
                 let _ = ret.send(Ok(data));
             },
-            DocumentMsg::SaveDocument { rev_id: _, ret } => {
-                // let result = self.save_to_disk(rev_id).await;
-                let _ = ret.send(Ok(()));
-            },
         }
         Ok(())
     }
@@ -116,11 +112,12 @@ impl DocumentActor {
     async fn composed_delta(&self, delta: Delta) -> DocResult<()> {
         // tracing::debug!("{:?} thread handle_message", thread::current(),);
         let mut document = self.document.write().await;
-        let result = document.compose_delta(&delta);
         tracing::Span::current().record(
             "composed_delta",
             &format!("doc_id:{} - {}", &self.doc_id, delta.to_json()).as_str(),
         );
+
+        let result = document.compose_delta(delta);
         drop(document);
 
         result

+ 4 - 13
rust-lib/flowy-document/src/services/doc/edit/edit_doc.rs

@@ -72,8 +72,8 @@ impl ClientEditDoc {
         };
         let _ = self.document.send(msg);
         let delta = rx.await.map_err(internal_error)??;
-        let rev_id = self.save_local_delta(delta).await?;
-        save_document(self.document.clone(), rev_id.into()).await
+        let _ = self.save_local_delta(delta).await?;
+        Ok(())
     }
 
     pub async fn delete(&self, interval: Interval) -> Result<(), DocError> {
@@ -171,8 +171,8 @@ impl ClientEditDoc {
         let _ = self.document.send(msg);
         let _ = rx.await.map_err(internal_error)??;
 
-        let rev_id = self.save_local_delta(delta).await?;
-        save_document(self.document.clone(), rev_id).await
+        let _ = self.save_local_delta(delta).await?;
+        Ok(())
     }
 
     #[cfg(feature = "flowy_test")]
@@ -249,8 +249,6 @@ impl ClientEditDoc {
             RevType::Remote,
         );
         let _ = self.ws.send(revision.into());
-
-        let _ = save_document(self.document.clone(), local_rev_id.into()).await?;
         Ok(())
     }
 
@@ -311,13 +309,6 @@ fn spawn_rev_receiver(mut receiver: mpsc::UnboundedReceiver<Revision>, ws: Arc<d
     });
 }
 
-async fn save_document(document: UnboundedSender<DocumentMsg>, rev_id: RevId) -> DocResult<()> {
-    let (ret, rx) = oneshot::channel::<DocResult<()>>();
-    let _ = document.send(DocumentMsg::SaveDocument { rev_id, ret });
-    let result = rx.await.map_err(internal_error)?;
-    result
-}
-
 fn spawn_doc_edit_actor(doc_id: &str, delta: Delta, _pool: Arc<ConnectionPool>) -> UnboundedSender<DocumentMsg> {
     let (sender, receiver) = mpsc::unbounded_channel::<DocumentMsg>();
     let actor = DocumentActor::new(doc_id, delta, receiver);

+ 0 - 4
rust-lib/flowy-document/src/services/doc/edit/message.rs

@@ -50,10 +50,6 @@ pub enum DocumentMsg {
     Doc {
         ret: Ret<String>,
     },
-    SaveDocument {
-        rev_id: RevId,
-        ret: Ret<()>,
-    },
 }
 
 pub struct TransformDeltas {

+ 2 - 4
rust-lib/flowy-document/src/services/doc/extensions/insert/auto_format.rs

@@ -1,9 +1,7 @@
 use std::cmp::min;
-
-use bytecount::num_chars;
 use url::Url;
 
-use flowy_ot::core::{plain_attributes, Attribute, Attributes, Delta, DeltaBuilder, DeltaIter};
+use flowy_ot::core::{count_utf16_code_units, plain_attributes, Attribute, Attributes, Delta, DeltaBuilder, DeltaIter};
 
 use crate::services::{doc::extensions::InsertExt, util::is_whitespace};
 
@@ -70,7 +68,7 @@ impl AutoFormatter {
             AutoFormatter::Url(url) => url.to_string(),
         };
 
-        num_chars(s.as_bytes())
+        count_utf16_code_units(&s)
     }
 }
 

+ 7 - 3
rust-lib/flowy-document/src/services/doc/view.rs

@@ -1,4 +1,5 @@
 use super::extensions::*;
+use crate::services::doc::trim;
 use flowy_ot::{
     core::{Attribute, Delta, Interval},
     errors::{ErrorBuilder, OTError, OTErrorCode},
@@ -24,7 +25,8 @@ impl View {
     pub(crate) fn insert(&self, delta: &Delta, text: &str, interval: Interval) -> Result<Delta, OTError> {
         let mut new_delta = None;
         for ext in &self.insert_exts {
-            if let Some(delta) = ext.apply(delta, interval.size(), text, interval.start) {
+            if let Some(mut delta) = ext.apply(delta, interval.size(), text, interval.start) {
+                trim(&mut delta);
                 tracing::trace!("[{}]: applied, delta: {}", ext.ext_name(), delta);
                 new_delta = Some(delta);
                 break;
@@ -40,7 +42,8 @@ impl View {
     pub(crate) fn delete(&self, delta: &Delta, interval: Interval) -> Result<Delta, OTError> {
         let mut new_delta = None;
         for ext in &self.delete_exts {
-            if let Some(delta) = ext.apply(delta, interval) {
+            if let Some(mut delta) = ext.apply(delta, interval) {
+                trim(&mut delta);
                 tracing::trace!("[{}]: applied, delta: {}", ext.ext_name(), delta);
                 new_delta = Some(delta);
                 break;
@@ -56,7 +59,8 @@ impl View {
     pub(crate) fn format(&self, delta: &Delta, attribute: Attribute, interval: Interval) -> Result<Delta, OTError> {
         let mut new_delta = None;
         for ext in &self.format_exts {
-            if let Some(delta) = ext.apply(delta, interval, &attribute) {
+            if let Some(mut delta) = ext.apply(delta, interval, &attribute) {
+                trim(&mut delta);
                 tracing::trace!("[{}]: applied, delta: {}", ext.ext_name(), delta);
                 new_delta = Some(delta);
                 break;

+ 21 - 2
rust-lib/flowy-document/tests/editor/attribute_test.rs

@@ -1,8 +1,8 @@
 #![cfg_attr(rustfmt, rustfmt::skip)]
 use crate::editor::{TestBuilder, TestOp::*};
 use flowy_document::services::doc::{FlowyDoc, PlainDoc};
-use flowy_ot::core::{Delta, Interval, OperationTransformable, NEW_LINE, WHITESPACE};
-
+use flowy_ot::core::{Delta, Interval, OperationTransformable, NEW_LINE, WHITESPACE, FlowyStr};
+use unicode_segmentation::UnicodeSegmentation;
 
 #[test]
 fn attributes_bold_added() {
@@ -719,6 +719,25 @@ fn attributes_preserve_header_format_on_merge() {
     TestBuilder::new().run_script::<FlowyDoc>(ops);
 }
 
+#[test]
+fn attributes_format_emoji() {
+    let emoji_s = "👋 ";
+    let s: FlowyStr = emoji_s.into();
+    let len = s.count_utf16_code_units();
+    assert_eq!(3, len);
+    assert_eq!(2, s.graphemes(true).count());
+    let ops = vec![
+        Insert(0, emoji_s, 0),
+        AssertDocJson(0, r#"[{"insert":"👋 \n"}]"#),
+        Header(0, Interval::new(0, len), 1),
+        AssertDocJson(
+            0,
+            r#"[{"insert":"👋 "},{"insert":"\n","attributes":{"header":1}}]"#,
+        ),
+    ];
+    TestBuilder::new().run_script::<FlowyDoc>(ops);
+}
+
 #[test]
 fn attributes_preserve_list_format_on_merge() {
     let ops = vec![

+ 1 - 1
rust-lib/flowy-document/tests/editor/mod.rs

@@ -247,7 +247,7 @@ impl TestBuilder {
             },
             TestOp::DocComposeDelta(doc_index, delta_i) => {
                 let delta = self.deltas.get(*delta_i).unwrap().as_ref().unwrap();
-                self.documents[*doc_index].compose_delta(delta).unwrap();
+                self.documents[*doc_index].compose_delta(delta.clone()).unwrap();
             },
             TestOp::DocComposePrime(doc_index, prime_i) => {
                 let delta = self

+ 7 - 8
rust-lib/flowy-document/tests/editor/op_test.rs

@@ -1,5 +1,4 @@
 use crate::editor::{Rng, TestBuilder, TestOp::*};
-use bytecount::num_chars;
 use flowy_document::services::doc::{FlowyDoc, PlainDoc};
 use flowy_ot::core::*;
 
@@ -326,9 +325,9 @@ fn sequence() {
 fn apply_1000() {
     for _ in 0..1000 {
         let mut rng = Rng::default();
-        let s = rng.gen_string(50);
+        let s: FlowyStr = rng.gen_string(50).into();
         let delta = rng.gen_delta(&s);
-        assert_eq!(num_chars(s.as_bytes()), delta.base_len);
+        assert_eq!(s.count_utf16_code_units(), delta.base_len);
         assert_eq!(delta.apply(&s).unwrap().chars().count(), delta.target_len);
     }
 }
@@ -441,16 +440,16 @@ fn compose() {
         let mut rng = Rng::default();
         let s = rng.gen_string(20);
         let a = rng.gen_delta(&s);
-        let after_a = a.apply(&s).unwrap();
-        assert_eq!(a.target_len, num_chars(after_a.as_bytes()));
+        let after_a: FlowyStr = a.apply(&s).unwrap().into();
+        assert_eq!(a.target_len, after_a.count_utf16_code_units());
 
         let b = rng.gen_delta(&after_a);
-        let after_b = b.apply(&after_a).unwrap();
-        assert_eq!(b.target_len, num_chars(after_b.as_bytes()));
+        let after_b: FlowyStr = b.apply(&after_a).unwrap().into();
+        assert_eq!(b.target_len, after_b.count_utf16_code_units());
 
         let ab = a.compose(&b).unwrap();
         assert_eq!(ab.target_len, b.target_len);
-        let after_ab = ab.apply(&s).unwrap();
+        let after_ab: FlowyStr = ab.apply(&s).unwrap().into();
         assert_eq!(after_b, after_ab);
     }
 }

+ 1 - 0
rust-lib/flowy-ot/Cargo.toml

@@ -19,3 +19,4 @@ bytes = "1.0"
 
 
 
+

+ 12 - 12
rust-lib/flowy-ot/src/core/delta/delta.rs

@@ -1,8 +1,7 @@
 use crate::{
-    core::{attributes::*, operation::*, DeltaIter, Interval, OperationTransformable, MAX_IV_LEN},
+    core::{attributes::*, operation::*, DeltaIter, FlowyStr, Interval, OperationTransformable, MAX_IV_LEN},
     errors::{ErrorBuilder, OTError, OTErrorCode},
 };
-use bytecount::num_chars;
 use bytes::Bytes;
 use std::{
     cmp::{min, Ordering},
@@ -127,26 +126,27 @@ impl Delta {
     }
 
     pub fn insert(&mut self, s: &str, attributes: Attributes) {
+        let s: FlowyStr = s.into();
         if s.is_empty() {
             return;
         }
 
-        self.target_len += num_chars(s.as_bytes());
+        self.target_len += s.count_utf16_code_units();
         let new_last = match self.ops.as_mut_slice() {
             [.., Operation::Insert(insert)] => {
                 //
-                insert.merge_or_new_op(s, attributes)
+                insert.merge_or_new_op(&s, attributes)
             },
             [.., Operation::Insert(pre_insert), Operation::Delete(_)] => {
                 //
-                pre_insert.merge_or_new_op(s, attributes)
+                pre_insert.merge_or_new_op(&s, attributes)
             },
             [.., op_last @ Operation::Delete(_)] => {
                 let new_last = op_last.clone();
-                *op_last = OpBuilder::insert(s).attributes(attributes).build();
+                *op_last = OpBuilder::insert(&s).attributes(attributes).build();
                 Some(new_last)
             },
-            _ => Some(OpBuilder::insert(s).attributes(attributes).build()),
+            _ => Some(OpBuilder::insert(&s).attributes(attributes).build()),
         };
 
         match new_last {
@@ -173,7 +173,8 @@ impl Delta {
 
     /// Applies an operation to a string, returning a new string.
     pub fn apply(&self, s: &str) -> Result<String, OTError> {
-        if num_chars(s.as_bytes()) != self.base_len {
+        let s: FlowyStr = s.into();
+        if s.count_utf16_code_units() != self.base_len {
             return Err(ErrorBuilder::new(OTErrorCode::IncompatibleLength).build());
         }
         let mut new_s = String::new();
@@ -214,7 +215,7 @@ impl Delta {
                     }
                 },
                 Operation::Insert(insert) => {
-                    inverted.delete(insert.num_chars());
+                    inverted.delete(insert.count_of_code_units());
                 },
                 Operation::Delete(delete) => {
                     inverted.insert(&chars.take(*delete as usize).collect::<String>(), op.get_attributes());
@@ -325,12 +326,12 @@ impl OperationTransformable for Delta {
                 (Some(Operation::Insert(insert)), _) => {
                     // let composed_attrs = transform_attributes(&next_op1, &next_op2, true);
                     a_prime.insert(&insert.s, insert.attributes.clone());
-                    b_prime.retain(insert.num_chars(), insert.attributes.clone());
+                    b_prime.retain(insert.count_of_code_units(), insert.attributes.clone());
                     next_op1 = ops1.next();
                 },
                 (_, Some(Operation::Insert(o_insert))) => {
                     let composed_attrs = transform_op_attribute(&next_op1, &next_op2);
-                    a_prime.retain(o_insert.num_chars(), composed_attrs.clone());
+                    a_prime.retain(o_insert.count_of_code_units(), composed_attrs.clone());
                     b_prime.insert(&o_insert.s, composed_attrs);
                     next_op2 = ops2.next();
                 },
@@ -417,7 +418,6 @@ impl OperationTransformable for Delta {
                 },
             }
         }
-
         Ok((a_prime, b_prime))
     }
 

+ 1 - 1
rust-lib/flowy-ot/src/core/delta/iterator.rs

@@ -162,7 +162,7 @@ impl<'a> Iterator for AttributesIter<'a> {
             Operation::Insert(insert) => {
                 tracing::trace!("extend insert attributes with {} ", &insert.attributes);
                 attributes.extend(insert.attributes.clone());
-                length = insert.num_chars();
+                length = insert.count_of_code_units();
             },
         }
 

+ 264 - 0
rust-lib/flowy-ot/src/core/flowy_str.rs

@@ -0,0 +1,264 @@
+use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
+use std::{fmt, fmt::Formatter, slice};
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FlowyStr(pub String);
+
+impl FlowyStr {
+    pub fn count_utf16_code_units(&self) -> usize { count_utf16_code_units(&self.0) }
+
+    pub fn iter(&self) -> FlowyUtf16Iterator { FlowyUtf16Iterator::new(self, 0) }
+
+    pub fn sub_str(&self, interval: Interval) -> String {
+        match self.with_interval(interval) {
+            None => "".to_owned(),
+            Some(s) => s.0,
+        }
+    }
+
+    pub fn with_interval(&self, interval: Interval) -> Option<FlowyStr> {
+        let mut iter = CodePointIterator::new(self);
+        let mut buf = vec![];
+        while let Some((byte, _len)) = iter.next() {
+            if interval.start < iter.code_point_offset && interval.end >= iter.code_point_offset {
+                buf.extend_from_slice(byte);
+            }
+        }
+
+        if buf.is_empty() {
+            return None;
+        }
+
+        match str::from_utf8(&buf) {
+            Ok(item) => Some(item.into()),
+            Err(_e) => None,
+        }
+    }
+}
+
+pub struct CodePointIterator<'a> {
+    s: &'a FlowyStr,
+    bytes_offset: usize,
+    code_point_offset: usize,
+    iter_index: usize,
+    iter: slice::Iter<'a, u8>,
+}
+
+impl<'a> CodePointIterator<'a> {
+    pub fn new(s: &'a FlowyStr) -> Self {
+        CodePointIterator {
+            s,
+            bytes_offset: 0,
+            code_point_offset: 0,
+            iter_index: 0,
+            iter: s.as_bytes().iter(),
+        }
+    }
+}
+
+impl<'a> Iterator for CodePointIterator<'a> {
+    type Item = (&'a [u8], usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let start = self.bytes_offset;
+        let _end = start;
+
+        while let Some(&b) = self.iter.next() {
+            self.iter_index += 1;
+
+            let mut code_point_count = 0;
+            if self.bytes_offset > self.iter_index {
+                continue;
+            }
+
+            if self.bytes_offset == self.iter_index {
+                break;
+            }
+
+            if (b as i8) >= -0x40 {
+                code_point_count += 1
+            }
+            if b >= 0xf0 {
+                code_point_count += 1
+            }
+
+            self.bytes_offset += len_utf8_from_first_byte(b);
+            self.code_point_offset += code_point_count;
+
+            if code_point_count == 1 {
+                break;
+            }
+        }
+
+        if start == self.bytes_offset {
+            return None;
+        }
+
+        let byte = &self.s.as_bytes()[start..self.bytes_offset];
+        Some((byte, self.bytes_offset - start))
+    }
+}
+
+impl std::ops::Deref for FlowyStr {
+    type Target = String;
+
+    fn deref(&self) -> &Self::Target { &self.0 }
+}
+
+impl std::ops::DerefMut for FlowyStr {
+    fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 }
+}
+
+impl std::convert::From<String> for FlowyStr {
+    fn from(s: String) -> Self { FlowyStr(s) }
+}
+
+impl std::convert::From<&str> for FlowyStr {
+    fn from(s: &str) -> Self { s.to_owned().into() }
+}
+
+impl std::fmt::Display for FlowyStr {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str(&self.0) }
+}
+
+impl std::ops::Add<&str> for FlowyStr {
+    type Output = FlowyStr;
+
+    fn add(self, rhs: &str) -> FlowyStr {
+        let new_value = self.0 + rhs;
+        new_value.into()
+    }
+}
+
+impl std::ops::AddAssign<&str> for FlowyStr {
+    fn add_assign(&mut self, rhs: &str) { self.0 += rhs; }
+}
+
+impl Serialize for FlowyStr {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serializer.serialize_str(&self.0)
+    }
+}
+
+impl<'de> Deserialize<'de> for FlowyStr {
+    fn deserialize<D>(deserializer: D) -> Result<FlowyStr, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        struct FlowyStrVisitor;
+
+        impl<'de> Visitor<'de> for FlowyStrVisitor {
+            type Value = FlowyStr;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a str") }
+
+            fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
+            where
+                E: de::Error,
+            {
+                Ok(s.into())
+            }
+        }
+        deserializer.deserialize_str(FlowyStrVisitor)
+    }
+}
+
+pub struct FlowyUtf16Iterator<'a> {
+    s: &'a FlowyStr,
+    offset: usize,
+}
+
+impl<'a> FlowyUtf16Iterator<'a> {
+    pub fn new(s: &'a FlowyStr, offset: usize) -> Self { FlowyUtf16Iterator { s, offset } }
+}
+
+use crate::core::Interval;
+use std::str;
+
+impl<'a> Iterator for FlowyUtf16Iterator<'a> {
+    type Item = String;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.offset == self.s.len() {
+            None
+        } else {
+            let byte = self.s.as_bytes()[self.offset];
+            let end = len_utf8_from_first_byte(byte);
+            let buf = &self.s.as_bytes()[self.offset..self.offset + end];
+            self.offset += end;
+            match str::from_utf8(buf) {
+                Ok(item) => Some(item.to_string()),
+                Err(_e) => None,
+            }
+        }
+    }
+}
+
+pub fn count_utf16_code_units(s: &str) -> usize {
+    // bytecount::num_chars(s.as_bytes())
+    let mut utf16_count = 0;
+    for &b in s.as_bytes() {
+        if (b as i8) >= -0x40 {
+            utf16_count += 1;
+        }
+        if b >= 0xf0 {
+            utf16_count += 1;
+        }
+    }
+    utf16_count
+}
+
+/// Given the initial byte of a UTF-8 codepoint, returns the number of
+/// bytes required to represent the codepoint.
+/// RFC reference : https://tools.ietf.org/html/rfc3629#section-4
+pub fn len_utf8_from_first_byte(b: u8) -> usize {
+    match b {
+        b if b < 0x80 => 1,
+        b if b < 0xe0 => 2,
+        b if b < 0xf0 => 3,
+        _ => 4,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core::{FlowyStr, Interval};
+
+    #[test]
+    fn flowy_str_utf16_test() {
+        let s: FlowyStr = "👋😁👋😁".into();
+        let mut iter = s.iter();
+        assert_eq!(iter.next().unwrap(), "👋".to_string());
+        assert_eq!(iter.next().unwrap(), "😁".to_string());
+        assert_eq!(iter.next().unwrap(), "👋".to_string());
+        assert_eq!(iter.next().unwrap(), "😁".to_string());
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn flowy_str_utf16_iter_test() {
+        let s: FlowyStr = "👋👋😁😁👋👋".into();
+        let iter = s.iter();
+        let result = iter.skip(2).take(2).collect::<String>();
+        assert_eq!(result, "😁😁".to_string());
+    }
+
+    #[test]
+    fn flowy_str_code_point_test() {
+        let s: FlowyStr = "👋 \n👋".into();
+        let output = s.with_interval(Interval::new(0, 2)).unwrap().0;
+        assert_eq!(output, "👋");
+
+        let output = s.with_interval(Interval::new(2, 3)).unwrap().0;
+        assert_eq!(output, " ");
+
+        let output = s.with_interval(Interval::new(3, 4)).unwrap().0;
+        assert_eq!(output, "\n");
+
+        let output = s.with_interval(Interval::new(4, 6)).unwrap().0;
+        assert_eq!(output, "👋");
+    }
+}

+ 2 - 0
rust-lib/flowy-ot/src/core/mod.rs

@@ -1,11 +1,13 @@
 mod attributes;
 mod delta;
+mod flowy_str;
 mod interval;
 mod operation;
 
 use crate::errors::OTError;
 pub use attributes::*;
 pub use delta::*;
+pub use flowy_str::*;
 pub use interval::*;
 pub use operation::*;
 

+ 40 - 20
rust-lib/flowy-ot/src/core/operation/operation.rs

@@ -1,11 +1,9 @@
-use crate::core::{Attribute, Attributes, Interval, OpBuilder};
-use bytecount::num_chars;
+use crate::core::{Attribute, Attributes, FlowyStr, Interval, OpBuilder};
 use serde::__private::Formatter;
 use std::{
     cmp::min,
     fmt,
     ops::{Deref, DerefMut},
-    str::Chars,
 };
 
 #[derive(Debug, Clone, Eq, PartialEq)]
@@ -47,11 +45,12 @@ impl Operation {
     }
 
     pub fn len(&self) -> usize {
-        match self {
+        let len = match self {
             Operation::Delete(n) => *n,
             Operation::Retain(r) => r.n,
-            Operation::Insert(i) => i.num_chars(),
-        }
+            Operation::Insert(i) => i.count_of_code_units(),
+        };
+        len
     }
 
     pub fn is_empty(&self) -> bool { self.len() == 0 }
@@ -78,7 +77,7 @@ impl Operation {
                         .build(),
                 );
                 right = Some(
-                    OpBuilder::insert(&insert.s[index..insert.num_chars()])
+                    OpBuilder::insert(&insert.s[index..insert.count_of_code_units()])
                         .attributes(attributes)
                         .build(),
                 );
@@ -95,13 +94,18 @@ impl Operation {
                 .attributes(retain.attributes.clone())
                 .build(),
             Operation::Insert(insert) => {
-                if interval.start > insert.num_chars() {
+                if interval.start > insert.count_of_code_units() {
                     OpBuilder::insert("").build()
                 } else {
-                    let chars = insert.chars().skip(interval.start);
-                    let s = &chars.take(min(interval.size(), insert.num_chars())).collect::<String>();
-
-                    OpBuilder::insert(s).attributes(insert.attributes.clone()).build()
+                    // let s = &insert
+                    //     .s
+                    //     .chars()
+                    //     .skip(interval.start)
+                    //     .take(min(interval.size(), insert.count_of_code_units()))
+                    //     .collect::<String>();
+
+                    let s = insert.s.sub_str(interval);
+                    OpBuilder::insert(&s).attributes(insert.attributes.clone()).build()
                 }
             },
         };
@@ -132,6 +136,14 @@ impl Operation {
         }
         false
     }
+
+    pub fn is_plain(&self) -> bool {
+        match self {
+            Operation::Delete(_) => true,
+            Operation::Retain(retain) => retain.is_plain(),
+            Operation::Insert(insert) => insert.is_plain(),
+        }
+    }
 }
 
 impl fmt::Display for Operation {
@@ -212,7 +224,7 @@ impl DerefMut for Retain {
 #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
 pub struct Insert {
     #[serde(rename(serialize = "insert", deserialize = "insert"))]
-    pub s: String,
+    pub s: FlowyStr,
 
     #[serde(skip_serializing_if = "is_empty")]
     pub attributes: Attributes,
@@ -224,7 +236,7 @@ impl fmt::Display for Insert {
         if s.ends_with("\n") {
             s.pop();
             if s.is_empty() {
-                s = "new_line".to_owned();
+                s = "new_line".into();
             }
         }
 
@@ -237,11 +249,7 @@ impl fmt::Display for Insert {
 }
 
 impl Insert {
-    pub fn as_bytes(&self) -> &[u8] { self.s.as_bytes() }
-
-    pub fn chars(&self) -> Chars<'_> { self.s.chars() }
-
-    pub fn num_chars(&self) -> usize { num_chars(self.s.as_bytes()) as _ }
+    pub fn count_of_code_units(&self) -> usize { self.s.count_utf16_code_units() }
 
     pub fn merge_or_new_op(&mut self, s: &str, attributes: Attributes) -> Option<Operation> {
         if self.attributes == attributes {
@@ -251,12 +259,14 @@ impl Insert {
             Some(OpBuilder::insert(s).attributes(attributes).build())
         }
     }
+
+    pub fn is_plain(&self) -> bool { self.attributes.is_empty() }
 }
 
 impl std::convert::From<String> for Insert {
     fn from(s: String) -> Self {
         Insert {
-            s,
+            s: s.into(),
             attributes: Attributes::default(),
         }
     }
@@ -265,4 +275,14 @@ impl std::convert::From<String> for Insert {
 impl std::convert::From<&str> for Insert {
     fn from(s: &str) -> Self { Insert::from(s.to_owned()) }
 }
+
+impl std::convert::From<FlowyStr> for Insert {
+    fn from(s: FlowyStr) -> Self {
+        Insert {
+            s,
+            attributes: Attributes::default(),
+        }
+    }
+}
+
 fn is_empty(attributes: &Attributes) -> bool { attributes.is_empty() }

+ 1 - 1
rust-lib/flowy-user-infra/Cargo.toml

@@ -9,7 +9,7 @@ edition = "2018"
 flowy-derive = { path = "../flowy-derive" }
 protobuf = {version = "2.18.0"}
 bytes = "1.0"
-unicode-segmentation = "1.7.1"
+unicode-segmentation = "1.8"
 derive_more = {version = "0.99", features = ["display"]}
 validator = "0.12.0"
 log = "0.4.14"

+ 1 - 1
rust-lib/flowy-workspace-infra/Cargo.toml

@@ -9,7 +9,7 @@ edition = "2018"
 flowy-derive = { path = "../flowy-derive" }
 protobuf = {version = "2.18.0"}
 bytes = "1.0"
-unicode-segmentation = "1.7.1"
+unicode-segmentation = "1.8"
 strum = "0.21"
 strum_macros = "0.21"
 derive_more = {version = "0.99", features = ["display"]}