rev_persistence.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. use crate::cache::memory::RevisionMemoryCacheDelegate;
  2. use crate::memory::RevisionMemoryCache;
  3. use crate::RevisionMergeable;
  4. use flowy_error::{internal_error, FlowyError, FlowyResult};
  5. use flowy_revision_persistence::{RevisionChangeset, RevisionDiskCache, RevisionState, SyncRecord};
  6. use revision_model::{Revision, RevisionRange};
  7. use std::collections::{HashMap, VecDeque};
  8. use std::{borrow::Cow, sync::Arc};
  9. use tokio::sync::RwLock;
  10. use tokio::task::spawn_blocking;
  11. pub const REVISION_WRITE_INTERVAL_IN_MILLIS: u64 = 600;
  12. #[derive(Clone)]
  13. pub struct RevisionPersistenceConfiguration {
  14. // If the number of revisions that didn't sync to the server greater than the max_merge_len
  15. // then these revisions will be merged into one revision.
  16. max_merge_len: usize,
  17. /// Indicates that the revisions that didn't sync to the server can be merged into one when
  18. /// `merge_lagging_revisions` get called.
  19. merge_lagging: bool,
  20. }
  21. impl RevisionPersistenceConfiguration {
  22. pub fn new(merge_max_length: usize, merge_lagging: bool) -> Self {
  23. debug_assert!(merge_max_length > 1);
  24. if merge_max_length > 1 {
  25. Self {
  26. max_merge_len: merge_max_length,
  27. merge_lagging,
  28. }
  29. } else {
  30. Self {
  31. max_merge_len: 100,
  32. merge_lagging,
  33. }
  34. }
  35. }
  36. }
  37. impl std::default::Default for RevisionPersistenceConfiguration {
  38. fn default() -> Self {
  39. Self {
  40. max_merge_len: 100,
  41. merge_lagging: false,
  42. }
  43. }
  44. }
  45. /// Represents as the persistence of revisions including memory or disk cache.
  46. /// The generic parameter, `Connection`, represents as the disk backend's connection.
  47. /// If the backend is SQLite, then the Connect will be SQLiteConnect.
  48. pub struct RevisionPersistence<Connection> {
  49. user_id: String,
  50. object_id: String,
  51. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  52. memory_cache: Arc<RevisionMemoryCache>,
  53. sync_seq: RwLock<DeferSyncSequence>,
  54. configuration: RevisionPersistenceConfiguration,
  55. }
  56. impl<Connection> RevisionPersistence<Connection>
  57. where
  58. Connection: 'static,
  59. {
  60. pub fn new<C>(
  61. user_id: &str,
  62. object_id: &str,
  63. disk_cache: C,
  64. configuration: RevisionPersistenceConfiguration,
  65. ) -> RevisionPersistence<Connection>
  66. where
  67. C: 'static + RevisionDiskCache<Connection, Error = FlowyError>,
  68. {
  69. let disk_cache =
  70. Arc::new(disk_cache) as Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>;
  71. Self::from_disk_cache(user_id, object_id, disk_cache, configuration)
  72. }
  73. pub fn from_disk_cache(
  74. user_id: &str,
  75. object_id: &str,
  76. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  77. configuration: RevisionPersistenceConfiguration,
  78. ) -> RevisionPersistence<Connection> {
  79. let object_id = object_id.to_owned();
  80. let user_id = user_id.to_owned();
  81. let sync_seq = RwLock::new(DeferSyncSequence::new());
  82. let memory_cache = Arc::new(RevisionMemoryCache::new(
  83. &object_id,
  84. Arc::new(disk_cache.clone()),
  85. ));
  86. Self {
  87. user_id,
  88. object_id,
  89. disk_cache,
  90. memory_cache,
  91. sync_seq,
  92. configuration,
  93. }
  94. }
  95. /// Save the revision that comes from remote to disk.
  96. #[tracing::instrument(level = "trace", skip(self, revision), fields(rev_id, object_id=%self.object_id), err)]
  97. pub(crate) async fn add_ack_revision(&self, revision: &Revision) -> FlowyResult<()> {
  98. tracing::Span::current().record("rev_id", revision.rev_id);
  99. self.add(revision.clone(), RevisionState::Ack, true).await
  100. }
  101. #[tracing::instrument(level = "trace", skip_all, err)]
  102. pub async fn merge_lagging_revisions<'a>(
  103. &'a self,
  104. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  105. ) -> FlowyResult<()> {
  106. if !self.configuration.merge_lagging {
  107. return Ok(());
  108. }
  109. let mut sync_seq = self.sync_seq.write().await;
  110. let compact_seq = sync_seq.pop_merge_rev_ids();
  111. if !compact_seq.is_empty() {
  112. let range = RevisionRange {
  113. start: *compact_seq.front().unwrap(),
  114. end: *compact_seq.back().unwrap(),
  115. };
  116. let revisions = self.revisions_in_range(&range).await?;
  117. let rev_ids = range.to_rev_ids();
  118. debug_assert_eq!(range.len() as usize, revisions.len());
  119. // compact multiple revisions into one
  120. let merged_revision =
  121. rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  122. tracing::Span::current().record("rev_id", merged_revision.rev_id);
  123. let record = SyncRecord {
  124. revision: merged_revision,
  125. state: RevisionState::Sync,
  126. write_to_disk: true,
  127. };
  128. self
  129. .disk_cache
  130. .delete_and_insert_records(&self.object_id, Some(rev_ids), vec![record])?;
  131. }
  132. Ok(())
  133. }
  134. /// Sync the each records' revisions to remote if its state is `RevisionState::Sync`.
  135. ///
  136. pub(crate) async fn sync_revision_records(&self, records: &[SyncRecord]) -> FlowyResult<()> {
  137. let mut sync_seq = self.sync_seq.write().await;
  138. for record in records {
  139. if record.state == RevisionState::Sync {
  140. self
  141. .add(record.revision.clone(), RevisionState::Sync, false)
  142. .await?;
  143. sync_seq.recv(record.revision.rev_id)?; // Sync the records if their state is RevisionState::Sync.
  144. }
  145. }
  146. Ok(())
  147. }
  148. /// Save the revision to disk and append it to the end of the sync sequence.
  149. /// The returned value,rev_id, will be different with the passed-in revision's rev_id if
  150. /// multiple revisions are merged into one.
  151. #[tracing::instrument(level = "trace", skip_all, fields(rev_id, compact_range, object_id=%self.object_id), err)]
  152. pub(crate) async fn add_local_revision<'a>(
  153. &'a self,
  154. new_revision: Revision,
  155. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  156. ) -> FlowyResult<i64> {
  157. let mut sync_seq = self.sync_seq.write().await;
  158. // Before the new_revision is pushed into the sync_seq, we check if the current `merge_length` of the
  159. // sync_seq is less equal to or greater than the `merge_max_length`. If yes, it's needs to merged
  160. // with the new_revision into one revision.
  161. let mut merge_rev_ids = VecDeque::default();
  162. // tracing::info!("{}", compact_seq)
  163. if sync_seq.merge_length >= self.configuration.max_merge_len - 1 {
  164. merge_rev_ids.extend(sync_seq.pop_merge_rev_ids());
  165. }
  166. if !merge_rev_ids.is_empty() {
  167. let range = RevisionRange {
  168. start: *merge_rev_ids.front().unwrap(),
  169. end: *merge_rev_ids.back().unwrap(),
  170. };
  171. tracing::Span::current().record("compact_range", format!("{}", range).as_str());
  172. let mut revisions = self.revisions_in_range(&range).await?;
  173. debug_assert_eq!(range.len() as usize, revisions.len());
  174. // append the new revision
  175. revisions.push(new_revision);
  176. // compact multiple revisions into one
  177. let merged_revision =
  178. rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  179. let new_rev_id = merged_revision.rev_id;
  180. tracing::Span::current().record("rev_id", merged_revision.rev_id);
  181. sync_seq.recv(new_rev_id)?;
  182. // replace the revisions in range with compact revision
  183. self.compact(&range, merged_revision).await?;
  184. Ok(new_rev_id)
  185. } else {
  186. let rev_id = new_revision.rev_id;
  187. tracing::Span::current().record("rev_id", rev_id);
  188. self.add(new_revision, RevisionState::Sync, true).await?;
  189. sync_seq.merge_recv(rev_id)?;
  190. Ok(rev_id)
  191. }
  192. }
  193. /// Remove the revision with rev_id from the sync sequence.
  194. pub(crate) async fn ack_revision(&self, rev_id: i64) -> FlowyResult<()> {
  195. if self.sync_seq.write().await.ack(&rev_id).is_ok() {
  196. self.memory_cache.ack(&rev_id).await;
  197. }
  198. Ok(())
  199. }
  200. pub(crate) async fn next_sync_revision(&self) -> FlowyResult<Option<Revision>> {
  201. match self.sync_seq.read().await.next_rev_id() {
  202. None => Ok(None),
  203. Some(rev_id) => Ok(self.get(rev_id).await.map(|record| record.revision)),
  204. }
  205. }
  206. pub(crate) async fn next_sync_rev_id(&self) -> Option<i64> {
  207. self.sync_seq.read().await.next_rev_id()
  208. }
  209. pub(crate) fn number_of_sync_records(&self) -> usize {
  210. self.memory_cache.number_of_sync_records()
  211. }
  212. pub(crate) fn number_of_records_in_disk(&self) -> usize {
  213. match self.disk_cache.read_revision_records(&self.object_id, None) {
  214. Ok(records) => records.len(),
  215. Err(e) => {
  216. tracing::error!("Read revision records failed: {:?}", e);
  217. 0
  218. },
  219. }
  220. }
  221. /// The cache gets reset while it conflicts with the remote revisions.
  222. #[tracing::instrument(level = "trace", skip(self, revisions), err)]
  223. pub(crate) async fn reset(&self, revisions: Vec<Revision>) -> FlowyResult<()> {
  224. let records = revisions
  225. .into_iter()
  226. .map(|revision| SyncRecord {
  227. revision,
  228. state: RevisionState::Sync,
  229. write_to_disk: false,
  230. })
  231. .collect::<Vec<_>>();
  232. self
  233. .disk_cache
  234. .delete_and_insert_records(&self.object_id, None, records.clone())?;
  235. self.memory_cache.reset_with_revisions(records).await;
  236. self.sync_seq.write().await.clear();
  237. Ok(())
  238. }
  239. async fn add(
  240. &self,
  241. revision: Revision,
  242. state: RevisionState,
  243. write_to_disk: bool,
  244. ) -> FlowyResult<()> {
  245. if self.memory_cache.contains(&revision.rev_id) {
  246. tracing::warn!(
  247. "Duplicate revision: {}:{}-{:?}",
  248. self.object_id,
  249. revision.rev_id,
  250. state
  251. );
  252. return Ok(());
  253. }
  254. let record = SyncRecord {
  255. revision,
  256. state,
  257. write_to_disk,
  258. };
  259. self.memory_cache.add(Cow::Owned(record)).await;
  260. Ok(())
  261. }
  262. async fn compact(&self, range: &RevisionRange, new_revision: Revision) -> FlowyResult<()> {
  263. self.memory_cache.remove_with_range(range);
  264. let rev_ids = range.to_rev_ids();
  265. self
  266. .disk_cache
  267. .delete_revision_records(&self.object_id, Some(rev_ids))?;
  268. self.add(new_revision, RevisionState::Sync, true).await?;
  269. Ok(())
  270. }
  271. pub async fn get(&self, rev_id: i64) -> Option<SyncRecord> {
  272. match self.memory_cache.get(&rev_id).await {
  273. None => match self
  274. .disk_cache
  275. .read_revision_records(&self.object_id, Some(vec![rev_id]))
  276. {
  277. Ok(mut records) => {
  278. let record = records.pop()?;
  279. assert!(records.is_empty());
  280. Some(record)
  281. },
  282. Err(e) => {
  283. tracing::error!("{}", e);
  284. None
  285. },
  286. },
  287. Some(revision) => Some(revision),
  288. }
  289. }
  290. pub fn load_all_records(&self, object_id: &str) -> FlowyResult<Vec<SyncRecord>> {
  291. let mut record_ids = HashMap::new();
  292. let mut records = vec![];
  293. for record in self.disk_cache.read_revision_records(object_id, None)? {
  294. let rev_id = record.revision.rev_id;
  295. if record_ids.get(&rev_id).is_none() {
  296. records.push(record);
  297. }
  298. record_ids.insert(rev_id, rev_id);
  299. }
  300. Ok(records)
  301. }
  302. // Read the revision which rev_id >= range.start && rev_id <= range.end
  303. pub async fn revisions_in_range(&self, range: &RevisionRange) -> FlowyResult<Vec<Revision>> {
  304. let range = range.clone();
  305. let mut records = self.memory_cache.get_with_range(&range).await?;
  306. let range_len = range.len() as usize;
  307. if records.len() != range_len {
  308. let disk_cache = self.disk_cache.clone();
  309. let object_id = self.object_id.clone();
  310. records =
  311. spawn_blocking(move || disk_cache.read_revision_records_with_range(&object_id, &range))
  312. .await
  313. .map_err(internal_error)??;
  314. if records.len() != range_len {
  315. tracing::error!(
  316. "Expect revision len {},but receive {}",
  317. range_len,
  318. records.len()
  319. );
  320. }
  321. }
  322. Ok(
  323. records
  324. .into_iter()
  325. .map(|record| record.revision)
  326. .collect::<Vec<Revision>>(),
  327. )
  328. }
  329. pub fn delete_revisions_from_range(&self, range: RevisionRange) -> FlowyResult<()> {
  330. self
  331. .disk_cache
  332. .delete_revision_records(&self.object_id, Some(range.to_rev_ids()))?;
  333. Ok(())
  334. }
  335. }
  336. impl<C> RevisionMemoryCacheDelegate for Arc<dyn RevisionDiskCache<C, Error = FlowyError>> {
  337. fn send_sync(&self, mut records: Vec<SyncRecord>) -> FlowyResult<()> {
  338. records.retain(|record| record.write_to_disk);
  339. if !records.is_empty() {
  340. tracing::Span::current().record(
  341. "checkpoint_result",
  342. format!("{} records were saved", records.len()).as_str(),
  343. );
  344. self.create_revision_records(records)?;
  345. }
  346. Ok(())
  347. }
  348. fn receive_ack(&self, object_id: &str, rev_id: i64) {
  349. let changeset = RevisionChangeset {
  350. object_id: object_id.to_string(),
  351. rev_id,
  352. state: RevisionState::Ack,
  353. };
  354. match self.update_revision_record(vec![changeset]) {
  355. Ok(_) => {},
  356. Err(e) => tracing::error!("{}", e),
  357. }
  358. }
  359. }
  360. #[derive(Default)]
  361. struct DeferSyncSequence {
  362. rev_ids: VecDeque<i64>,
  363. merge_start: Option<usize>,
  364. merge_length: usize,
  365. }
  366. impl DeferSyncSequence {
  367. fn new() -> Self {
  368. DeferSyncSequence::default()
  369. }
  370. /// Pushes the new_rev_id to the end of the list and marks this new_rev_id is mergeable.
  371. ///
  372. fn merge_recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  373. self.recv(new_rev_id)?;
  374. self.merge_length += 1;
  375. if self.merge_start.is_none() && !self.rev_ids.is_empty() {
  376. self.merge_start = Some(self.rev_ids.len() - 1);
  377. }
  378. Ok(())
  379. }
  380. /// Pushes the new_rev_id to the end of the list.
  381. fn recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  382. // The last revision's rev_id must be greater than the new one.
  383. if let Some(rev_id) = self.rev_ids.back() {
  384. if *rev_id >= new_rev_id {
  385. tracing::error!("The new revision's id must be greater than {}", rev_id);
  386. return Ok(());
  387. }
  388. }
  389. self.rev_ids.push_back(new_rev_id);
  390. Ok(())
  391. }
  392. /// Removes the rev_id from the list
  393. fn ack(&mut self, rev_id: &i64) -> FlowyResult<()> {
  394. let cur_rev_id = self.rev_ids.front().cloned();
  395. if let Some(pop_rev_id) = cur_rev_id {
  396. if &pop_rev_id != rev_id {
  397. let desc = format!(
  398. "The ack rev_id:{} is not equal to the current rev_id:{}",
  399. rev_id, pop_rev_id
  400. );
  401. return Err(FlowyError::internal().context(desc));
  402. }
  403. let mut compact_rev_id = None;
  404. if let Some(compact_index) = self.merge_start {
  405. compact_rev_id = self.rev_ids.get(compact_index).cloned();
  406. }
  407. let pop_rev_id = self.rev_ids.pop_front();
  408. if let (Some(compact_rev_id), Some(pop_rev_id)) = (compact_rev_id, pop_rev_id) {
  409. if compact_rev_id <= pop_rev_id && self.merge_length > 0 {
  410. self.merge_length -= 1;
  411. }
  412. }
  413. }
  414. Ok(())
  415. }
  416. fn next_rev_id(&self) -> Option<i64> {
  417. self.rev_ids.front().cloned()
  418. }
  419. fn clear(&mut self) {
  420. self.merge_start = None;
  421. self.merge_length = 0;
  422. self.rev_ids.clear();
  423. }
  424. // Returns the rev_ids into one except the current synchronizing rev_id.
  425. fn pop_merge_rev_ids(&mut self) -> VecDeque<i64> {
  426. let mut compact_seq = VecDeque::with_capacity(self.rev_ids.len());
  427. if let Some(start) = self.merge_start {
  428. if start < self.rev_ids.len() {
  429. let seq = self.rev_ids.split_off(start);
  430. compact_seq.extend(seq);
  431. }
  432. }
  433. self.merge_start = None;
  434. self.merge_length = 0;
  435. compact_seq
  436. }
  437. }