rev_persistence.rs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. use crate::cache::{
  2. disk::{RevisionChangeset, RevisionDiskCache},
  3. memory::RevisionMemoryCacheDelegate,
  4. };
  5. use crate::disk::{RevisionState, SyncRecord};
  6. use crate::memory::RevisionMemoryCache;
  7. use crate::RevisionMergeable;
  8. use flowy_error::{internal_error, FlowyError, FlowyResult};
  9. use flowy_sync::entities::revision::{Revision, RevisionRange};
  10. use std::collections::VecDeque;
  11. use std::{borrow::Cow, sync::Arc};
  12. use tokio::sync::RwLock;
  13. use tokio::task::spawn_blocking;
  14. pub const REVISION_WRITE_INTERVAL_IN_MILLIS: u64 = 600;
  15. pub struct RevisionPersistenceConfiguration {
  16. merge_threshold: usize,
  17. }
  18. impl RevisionPersistenceConfiguration {
  19. pub fn new(merge_threshold: usize) -> Self {
  20. debug_assert!(merge_threshold > 1);
  21. if merge_threshold > 1 {
  22. Self { merge_threshold }
  23. } else {
  24. Self { merge_threshold: 2 }
  25. }
  26. }
  27. }
  28. impl std::default::Default for RevisionPersistenceConfiguration {
  29. fn default() -> Self {
  30. Self { merge_threshold: 2 }
  31. }
  32. }
  33. pub struct RevisionPersistence<Connection> {
  34. user_id: String,
  35. object_id: String,
  36. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  37. memory_cache: Arc<RevisionMemoryCache>,
  38. sync_seq: RwLock<DeferSyncSequence>,
  39. configuration: RevisionPersistenceConfiguration,
  40. }
  41. impl<Connection> RevisionPersistence<Connection>
  42. where
  43. Connection: 'static,
  44. {
  45. pub fn new<C>(
  46. user_id: &str,
  47. object_id: &str,
  48. disk_cache: C,
  49. configuration: RevisionPersistenceConfiguration,
  50. ) -> RevisionPersistence<Connection>
  51. where
  52. C: 'static + RevisionDiskCache<Connection, Error = FlowyError>,
  53. {
  54. let disk_cache = Arc::new(disk_cache) as Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>;
  55. Self::from_disk_cache(user_id, object_id, disk_cache, configuration)
  56. }
  57. pub fn from_disk_cache(
  58. user_id: &str,
  59. object_id: &str,
  60. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  61. configuration: RevisionPersistenceConfiguration,
  62. ) -> RevisionPersistence<Connection> {
  63. let object_id = object_id.to_owned();
  64. let user_id = user_id.to_owned();
  65. let sync_seq = RwLock::new(DeferSyncSequence::new());
  66. let memory_cache = Arc::new(RevisionMemoryCache::new(&object_id, Arc::new(disk_cache.clone())));
  67. Self {
  68. user_id,
  69. object_id,
  70. disk_cache,
  71. memory_cache,
  72. sync_seq,
  73. configuration,
  74. }
  75. }
  76. /// Save the revision that comes from remote to disk.
  77. #[tracing::instrument(level = "trace", skip(self, revision), fields(rev_id, object_id=%self.object_id), err)]
  78. pub(crate) async fn add_ack_revision(&self, revision: &Revision) -> FlowyResult<()> {
  79. tracing::Span::current().record("rev_id", &revision.rev_id);
  80. self.add(revision.clone(), RevisionState::Ack, true).await
  81. }
  82. /// Append the revision that already existed in the local DB state to sync sequence
  83. #[tracing::instrument(level = "trace", skip(self), fields(rev_id, object_id=%self.object_id), err)]
  84. pub(crate) async fn sync_revision(&self, revision: &Revision) -> FlowyResult<()> {
  85. tracing::Span::current().record("rev_id", &revision.rev_id);
  86. self.add(revision.clone(), RevisionState::Sync, false).await?;
  87. self.sync_seq.write().await.dry_push(revision.rev_id)?;
  88. Ok(())
  89. }
  90. /// Save the revision to disk and append it to the end of the sync sequence.
  91. #[tracing::instrument(level = "trace", skip_all, fields(rev_id, compact_range, object_id=%self.object_id), err)]
  92. pub(crate) async fn add_sync_revision<'a>(
  93. &'a self,
  94. new_revision: &'a Revision,
  95. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  96. ) -> FlowyResult<i64> {
  97. let mut sync_seq = self.sync_seq.write().await;
  98. let step = sync_seq.step;
  99. // Before the new_revision pushed into the sync_seq, we check if the current `step` of the
  100. // sync_seq is less equal or greater than the merge threshold. If yes, it's need to merged
  101. // with the new_revision into one revision.
  102. if step >= self.configuration.merge_threshold - 1 {
  103. let compact_seq = sync_seq.compact();
  104. let range = RevisionRange {
  105. start: *compact_seq.front().unwrap(),
  106. end: *compact_seq.back().unwrap(),
  107. };
  108. tracing::Span::current().record("compact_range", &format!("{}", range).as_str());
  109. let mut revisions = self.revisions_in_range(&range).await?;
  110. debug_assert_eq!(range.len() as usize, revisions.len());
  111. // append the new revision
  112. revisions.push(new_revision.clone());
  113. // compact multiple revisions into one
  114. let merged_revision = rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  115. let rev_id = merged_revision.rev_id;
  116. tracing::Span::current().record("rev_id", &merged_revision.rev_id);
  117. let _ = sync_seq.dry_push(merged_revision.rev_id)?;
  118. // replace the revisions in range with compact revision
  119. self.compact(&range, merged_revision).await?;
  120. Ok(rev_id)
  121. } else {
  122. tracing::Span::current().record("rev_id", &new_revision.rev_id);
  123. self.add(new_revision.clone(), RevisionState::Sync, true).await?;
  124. sync_seq.push(new_revision.rev_id)?;
  125. Ok(new_revision.rev_id)
  126. }
  127. }
  128. /// Remove the revision with rev_id from the sync sequence.
  129. pub(crate) async fn ack_revision(&self, rev_id: i64) -> FlowyResult<()> {
  130. if self.sync_seq.write().await.ack(&rev_id).is_ok() {
  131. self.memory_cache.ack(&rev_id).await;
  132. }
  133. Ok(())
  134. }
  135. pub(crate) async fn next_sync_revision(&self) -> FlowyResult<Option<Revision>> {
  136. match self.sync_seq.read().await.next_rev_id() {
  137. None => Ok(None),
  138. Some(rev_id) => Ok(self.get(rev_id).await.map(|record| record.revision)),
  139. }
  140. }
  141. pub(crate) async fn next_sync_rev_id(&self) -> Option<i64> {
  142. self.sync_seq.read().await.next_rev_id()
  143. }
  144. pub(crate) fn number_of_sync_records(&self) -> usize {
  145. self.memory_cache.number_of_sync_records()
  146. }
  147. /// The cache gets reset while it conflicts with the remote revisions.
  148. #[tracing::instrument(level = "trace", skip(self, revisions), err)]
  149. pub(crate) async fn reset(&self, revisions: Vec<Revision>) -> FlowyResult<()> {
  150. let records = revisions
  151. .into_iter()
  152. .map(|revision| SyncRecord {
  153. revision,
  154. state: RevisionState::Sync,
  155. write_to_disk: false,
  156. })
  157. .collect::<Vec<_>>();
  158. let _ = self
  159. .disk_cache
  160. .delete_and_insert_records(&self.object_id, None, records.clone())?;
  161. let _ = self.memory_cache.reset_with_revisions(records).await;
  162. self.sync_seq.write().await.clear();
  163. Ok(())
  164. }
  165. async fn add(&self, revision: Revision, state: RevisionState, write_to_disk: bool) -> FlowyResult<()> {
  166. if self.memory_cache.contains(&revision.rev_id) {
  167. tracing::warn!("Duplicate revision: {}:{}-{:?}", self.object_id, revision.rev_id, state);
  168. return Ok(());
  169. }
  170. let record = SyncRecord {
  171. revision,
  172. state,
  173. write_to_disk,
  174. };
  175. self.memory_cache.add(Cow::Owned(record)).await;
  176. Ok(())
  177. }
  178. async fn compact(&self, range: &RevisionRange, new_revision: Revision) -> FlowyResult<()> {
  179. self.memory_cache.remove_with_range(range);
  180. let rev_ids = range.to_rev_ids();
  181. let _ = self
  182. .disk_cache
  183. .delete_revision_records(&self.object_id, Some(rev_ids))?;
  184. self.add(new_revision, RevisionState::Sync, true).await?;
  185. Ok(())
  186. }
  187. pub async fn get(&self, rev_id: i64) -> Option<SyncRecord> {
  188. match self.memory_cache.get(&rev_id).await {
  189. None => match self
  190. .disk_cache
  191. .read_revision_records(&self.object_id, Some(vec![rev_id]))
  192. {
  193. Ok(mut records) => {
  194. let record = records.pop()?;
  195. assert!(records.is_empty());
  196. Some(record)
  197. }
  198. Err(e) => {
  199. tracing::error!("{}", e);
  200. None
  201. }
  202. },
  203. Some(revision) => Some(revision),
  204. }
  205. }
  206. pub fn batch_get(&self, doc_id: &str) -> FlowyResult<Vec<SyncRecord>> {
  207. self.disk_cache.read_revision_records(doc_id, None)
  208. }
  209. // Read the revision which rev_id >= range.start && rev_id <= range.end
  210. pub async fn revisions_in_range(&self, range: &RevisionRange) -> FlowyResult<Vec<Revision>> {
  211. let range = range.clone();
  212. let mut records = self.memory_cache.get_with_range(&range).await?;
  213. let range_len = range.len() as usize;
  214. if records.len() != range_len {
  215. let disk_cache = self.disk_cache.clone();
  216. let object_id = self.object_id.clone();
  217. records = spawn_blocking(move || disk_cache.read_revision_records_with_range(&object_id, &range))
  218. .await
  219. .map_err(internal_error)??;
  220. if records.len() != range_len {
  221. // #[cfg(debug_assertions)]
  222. // records.iter().for_each(|record| {
  223. // let delta = PlainDelta::from_bytes(&record.revision.delta_data).unwrap();
  224. // tracing::trace!("{}", delta.to_string());
  225. // });
  226. tracing::error!("Expect revision len {},but receive {}", range_len, records.len());
  227. }
  228. }
  229. Ok(records
  230. .into_iter()
  231. .map(|record| record.revision)
  232. .collect::<Vec<Revision>>())
  233. }
  234. }
  235. impl<C> RevisionMemoryCacheDelegate for Arc<dyn RevisionDiskCache<C, Error = FlowyError>> {
  236. fn send_sync(&self, mut records: Vec<SyncRecord>) -> FlowyResult<()> {
  237. records.retain(|record| record.write_to_disk);
  238. if !records.is_empty() {
  239. tracing::Span::current().record(
  240. "checkpoint_result",
  241. &format!("{} records were saved", records.len()).as_str(),
  242. );
  243. let _ = self.create_revision_records(records)?;
  244. }
  245. Ok(())
  246. }
  247. fn receive_ack(&self, object_id: &str, rev_id: i64) {
  248. let changeset = RevisionChangeset {
  249. object_id: object_id.to_string(),
  250. rev_id: rev_id.into(),
  251. state: RevisionState::Ack,
  252. };
  253. match self.update_revision_record(vec![changeset]) {
  254. Ok(_) => {}
  255. Err(e) => tracing::error!("{}", e),
  256. }
  257. }
  258. }
  259. #[derive(Default)]
  260. struct DeferSyncSequence {
  261. rev_ids: VecDeque<i64>,
  262. start: Option<usize>,
  263. step: usize,
  264. }
  265. impl DeferSyncSequence {
  266. fn new() -> Self {
  267. DeferSyncSequence::default()
  268. }
  269. fn push(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  270. let _ = self.dry_push(new_rev_id)?;
  271. self.step += 1;
  272. if self.start.is_none() && !self.rev_ids.is_empty() {
  273. self.start = Some(self.rev_ids.len() - 1);
  274. }
  275. Ok(())
  276. }
  277. fn dry_push(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  278. // The last revision's rev_id must be greater than the new one.
  279. if let Some(rev_id) = self.rev_ids.back() {
  280. if *rev_id >= new_rev_id {
  281. return Err(
  282. FlowyError::internal().context(format!("The new revision's id must be greater than {}", rev_id))
  283. );
  284. }
  285. }
  286. self.rev_ids.push_back(new_rev_id);
  287. Ok(())
  288. }
  289. fn ack(&mut self, rev_id: &i64) -> FlowyResult<()> {
  290. let cur_rev_id = self.rev_ids.front().cloned();
  291. if let Some(pop_rev_id) = cur_rev_id {
  292. if &pop_rev_id != rev_id {
  293. let desc = format!(
  294. "The ack rev_id:{} is not equal to the current rev_id:{}",
  295. rev_id, pop_rev_id
  296. );
  297. return Err(FlowyError::internal().context(desc));
  298. }
  299. let _ = self.rev_ids.pop_front();
  300. }
  301. Ok(())
  302. }
  303. fn next_rev_id(&self) -> Option<i64> {
  304. self.rev_ids.front().cloned()
  305. }
  306. fn clear(&mut self) {
  307. self.start = None;
  308. self.step = 0;
  309. self.rev_ids.clear();
  310. }
  311. // Compact the rev_ids into one except the current synchronizing rev_id.
  312. fn compact(&mut self) -> VecDeque<i64> {
  313. if self.start.is_none() {
  314. return VecDeque::default();
  315. }
  316. let start = self.start.unwrap();
  317. let compact_seq = self.rev_ids.split_off(start);
  318. self.start = None;
  319. self.step = 0;
  320. compact_seq
  321. // let mut new_seq = self.rev_ids.clone();
  322. // let mut drained = new_seq.drain(1..).collect::<VecDeque<_>>();
  323. //
  324. // let start = drained.pop_front()?;
  325. // let end = drained.pop_back().unwrap_or(start);
  326. // Some((RevisionRange { start, end }, new_seq))
  327. }
  328. }