rev_persistence.rs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. use crate::cache::{
  2. disk::{RevisionChangeset, RevisionDiskCache},
  3. memory::RevisionMemoryCacheDelegate,
  4. };
  5. use crate::disk::{RevisionState, SyncRecord};
  6. use crate::memory::RevisionMemoryCache;
  7. use crate::RevisionMergeable;
  8. use flowy_error::{internal_error, FlowyError, FlowyResult};
  9. use flowy_http_model::revision::{Revision, RevisionRange};
  10. use std::collections::{HashMap, VecDeque};
  11. use std::{borrow::Cow, sync::Arc};
  12. use tokio::sync::RwLock;
  13. use tokio::task::spawn_blocking;
  14. pub const REVISION_WRITE_INTERVAL_IN_MILLIS: u64 = 600;
  15. #[derive(Clone)]
  16. pub struct RevisionPersistenceConfiguration {
  17. // If the number of revisions that didn't sync to the server greater than the merge_threshold
  18. // then these revisions will be merged into one revision.
  19. merge_threshold: usize,
  20. /// Indicates that the revisions that didn't sync to the server can be merged into one when
  21. /// `compact_lagging_revisions` get called.
  22. merge_lagging: bool,
  23. }
  24. impl RevisionPersistenceConfiguration {
  25. pub fn new(merge_threshold: usize, merge_lagging: bool) -> Self {
  26. debug_assert!(merge_threshold > 1);
  27. if merge_threshold > 1 {
  28. Self {
  29. merge_threshold,
  30. merge_lagging,
  31. }
  32. } else {
  33. Self {
  34. merge_threshold: 100,
  35. merge_lagging,
  36. }
  37. }
  38. }
  39. }
  40. impl std::default::Default for RevisionPersistenceConfiguration {
  41. fn default() -> Self {
  42. Self {
  43. merge_threshold: 100,
  44. merge_lagging: false,
  45. }
  46. }
  47. }
  48. /// Represents as the persistence of revisions including memory or disk cache.
  49. /// The generic parameter, `Connection`, represents as the disk backend's connection.
  50. /// If the backend is SQLite, then the Connect will be SQLiteConnect.
  51. pub struct RevisionPersistence<Connection> {
  52. user_id: String,
  53. object_id: String,
  54. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  55. memory_cache: Arc<RevisionMemoryCache>,
  56. sync_seq: RwLock<DeferSyncSequence>,
  57. configuration: RevisionPersistenceConfiguration,
  58. }
  59. impl<Connection> RevisionPersistence<Connection>
  60. where
  61. Connection: 'static,
  62. {
  63. pub fn new<C>(
  64. user_id: &str,
  65. object_id: &str,
  66. disk_cache: C,
  67. configuration: RevisionPersistenceConfiguration,
  68. ) -> RevisionPersistence<Connection>
  69. where
  70. C: 'static + RevisionDiskCache<Connection, Error = FlowyError>,
  71. {
  72. let disk_cache = Arc::new(disk_cache) as Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>;
  73. Self::from_disk_cache(user_id, object_id, disk_cache, configuration)
  74. }
  75. pub fn from_disk_cache(
  76. user_id: &str,
  77. object_id: &str,
  78. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  79. configuration: RevisionPersistenceConfiguration,
  80. ) -> RevisionPersistence<Connection> {
  81. let object_id = object_id.to_owned();
  82. let user_id = user_id.to_owned();
  83. let sync_seq = RwLock::new(DeferSyncSequence::new());
  84. let memory_cache = Arc::new(RevisionMemoryCache::new(&object_id, Arc::new(disk_cache.clone())));
  85. Self {
  86. user_id,
  87. object_id,
  88. disk_cache,
  89. memory_cache,
  90. sync_seq,
  91. configuration,
  92. }
  93. }
  94. /// Save the revision that comes from remote to disk.
  95. #[tracing::instrument(level = "trace", skip(self, revision), fields(rev_id, object_id=%self.object_id), err)]
  96. pub(crate) async fn add_ack_revision(&self, revision: &Revision) -> FlowyResult<()> {
  97. tracing::Span::current().record("rev_id", &revision.rev_id);
  98. self.add(revision.clone(), RevisionState::Ack, true).await
  99. }
  100. #[tracing::instrument(level = "trace", skip_all, err)]
  101. pub async fn compact_lagging_revisions<'a>(
  102. &'a self,
  103. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  104. ) -> FlowyResult<()> {
  105. if !self.configuration.merge_lagging {
  106. return Ok(());
  107. }
  108. let mut sync_seq = self.sync_seq.write().await;
  109. let compact_seq = sync_seq.compact();
  110. if !compact_seq.is_empty() {
  111. let range = RevisionRange {
  112. start: *compact_seq.front().unwrap(),
  113. end: *compact_seq.back().unwrap(),
  114. };
  115. let revisions = self.revisions_in_range(&range).await?;
  116. let rev_ids = range.to_rev_ids();
  117. debug_assert_eq!(range.len() as usize, revisions.len());
  118. // compact multiple revisions into one
  119. let merged_revision = rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  120. tracing::Span::current().record("rev_id", &merged_revision.rev_id);
  121. let record = SyncRecord {
  122. revision: merged_revision,
  123. state: RevisionState::Sync,
  124. write_to_disk: true,
  125. };
  126. let _ = self
  127. .disk_cache
  128. .delete_and_insert_records(&self.object_id, Some(rev_ids), vec![record])?;
  129. }
  130. Ok(())
  131. }
  132. /// Sync the each records' revisions to remote if its state is `RevisionState::Sync`.
  133. ///
  134. pub(crate) async fn sync_revision_records(&self, records: &[SyncRecord]) -> FlowyResult<()> {
  135. let mut sync_seq = self.sync_seq.write().await;
  136. for record in records {
  137. if record.state == RevisionState::Sync {
  138. self.add(record.revision.clone(), RevisionState::Sync, false).await?;
  139. sync_seq.recv(record.revision.rev_id)?; // Sync the records if their state is RevisionState::Sync.
  140. }
  141. }
  142. Ok(())
  143. }
  144. /// Save the revision to disk and append it to the end of the sync sequence.
  145. /// The returned value,rev_id, will be different with the passed-in revision's rev_id if
  146. /// multiple revisions are merged into one.
  147. #[tracing::instrument(level = "trace", skip_all, fields(rev_id, compact_range, object_id=%self.object_id), err)]
  148. pub(crate) async fn add_local_revision<'a>(
  149. &'a self,
  150. new_revision: Revision,
  151. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  152. ) -> FlowyResult<i64> {
  153. let mut sync_seq = self.sync_seq.write().await;
  154. // Before the new_revision is pushed into the sync_seq, we check if the current `compact_length` of the
  155. // sync_seq is less equal to or greater than the merge threshold. If yes, it's needs to merged
  156. // with the new_revision into one revision.
  157. let mut compact_seq = VecDeque::default();
  158. // tracing::info!("{}", compact_seq)
  159. if sync_seq.compact_length >= self.configuration.merge_threshold - 1 {
  160. compact_seq.extend(sync_seq.compact());
  161. }
  162. if !compact_seq.is_empty() {
  163. let range = RevisionRange {
  164. start: *compact_seq.front().unwrap(),
  165. end: *compact_seq.back().unwrap(),
  166. };
  167. tracing::Span::current().record("compact_range", &format!("{}", range).as_str());
  168. let mut revisions = self.revisions_in_range(&range).await?;
  169. debug_assert_eq!(range.len() as usize, revisions.len());
  170. // append the new revision
  171. revisions.push(new_revision);
  172. // compact multiple revisions into one
  173. let merged_revision = rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  174. let rev_id = merged_revision.rev_id;
  175. tracing::Span::current().record("rev_id", &merged_revision.rev_id);
  176. let _ = sync_seq.recv(merged_revision.rev_id)?;
  177. // replace the revisions in range with compact revision
  178. self.compact(&range, merged_revision).await?;
  179. Ok(rev_id)
  180. } else {
  181. let rev_id = new_revision.rev_id;
  182. tracing::Span::current().record("rev_id", &rev_id);
  183. self.add(new_revision, RevisionState::Sync, true).await?;
  184. sync_seq.merge_recv(rev_id)?;
  185. Ok(rev_id)
  186. }
  187. }
  188. /// Remove the revision with rev_id from the sync sequence.
  189. pub(crate) async fn ack_revision(&self, rev_id: i64) -> FlowyResult<()> {
  190. if self.sync_seq.write().await.ack(&rev_id).is_ok() {
  191. self.memory_cache.ack(&rev_id).await;
  192. }
  193. Ok(())
  194. }
  195. pub(crate) async fn next_sync_revision(&self) -> FlowyResult<Option<Revision>> {
  196. match self.sync_seq.read().await.next_rev_id() {
  197. None => Ok(None),
  198. Some(rev_id) => Ok(self.get(rev_id).await.map(|record| record.revision)),
  199. }
  200. }
  201. pub(crate) async fn next_sync_rev_id(&self) -> Option<i64> {
  202. self.sync_seq.read().await.next_rev_id()
  203. }
  204. pub(crate) fn number_of_sync_records(&self) -> usize {
  205. self.memory_cache.number_of_sync_records()
  206. }
  207. pub(crate) fn number_of_records_in_disk(&self) -> usize {
  208. match self.disk_cache.read_revision_records(&self.object_id, None) {
  209. Ok(records) => records.len(),
  210. Err(e) => {
  211. tracing::error!("Read revision records failed: {:?}", e);
  212. 0
  213. }
  214. }
  215. }
  216. /// The cache gets reset while it conflicts with the remote revisions.
  217. #[tracing::instrument(level = "trace", skip(self, revisions), err)]
  218. pub(crate) async fn reset(&self, revisions: Vec<Revision>) -> FlowyResult<()> {
  219. let records = revisions
  220. .into_iter()
  221. .map(|revision| SyncRecord {
  222. revision,
  223. state: RevisionState::Sync,
  224. write_to_disk: false,
  225. })
  226. .collect::<Vec<_>>();
  227. let _ = self
  228. .disk_cache
  229. .delete_and_insert_records(&self.object_id, None, records.clone())?;
  230. let _ = self.memory_cache.reset_with_revisions(records).await;
  231. self.sync_seq.write().await.clear();
  232. Ok(())
  233. }
  234. async fn add(&self, revision: Revision, state: RevisionState, write_to_disk: bool) -> FlowyResult<()> {
  235. if self.memory_cache.contains(&revision.rev_id) {
  236. tracing::warn!("Duplicate revision: {}:{}-{:?}", self.object_id, revision.rev_id, state);
  237. return Ok(());
  238. }
  239. let record = SyncRecord {
  240. revision,
  241. state,
  242. write_to_disk,
  243. };
  244. self.memory_cache.add(Cow::Owned(record)).await;
  245. Ok(())
  246. }
  247. async fn compact(&self, range: &RevisionRange, new_revision: Revision) -> FlowyResult<()> {
  248. self.memory_cache.remove_with_range(range);
  249. let rev_ids = range.to_rev_ids();
  250. let _ = self
  251. .disk_cache
  252. .delete_revision_records(&self.object_id, Some(rev_ids))?;
  253. self.add(new_revision, RevisionState::Sync, true).await?;
  254. Ok(())
  255. }
  256. pub async fn get(&self, rev_id: i64) -> Option<SyncRecord> {
  257. match self.memory_cache.get(&rev_id).await {
  258. None => match self
  259. .disk_cache
  260. .read_revision_records(&self.object_id, Some(vec![rev_id]))
  261. {
  262. Ok(mut records) => {
  263. let record = records.pop()?;
  264. assert!(records.is_empty());
  265. Some(record)
  266. }
  267. Err(e) => {
  268. tracing::error!("{}", e);
  269. None
  270. }
  271. },
  272. Some(revision) => Some(revision),
  273. }
  274. }
  275. pub fn load_all_records(&self, object_id: &str) -> FlowyResult<Vec<SyncRecord>> {
  276. let mut record_ids = HashMap::new();
  277. let mut records = vec![];
  278. for record in self.disk_cache.read_revision_records(object_id, None)? {
  279. let rev_id = record.revision.rev_id;
  280. if record_ids.get(&rev_id).is_none() {
  281. records.push(record);
  282. }
  283. record_ids.insert(rev_id, rev_id);
  284. }
  285. Ok(records)
  286. }
  287. // Read the revision which rev_id >= range.start && rev_id <= range.end
  288. pub async fn revisions_in_range(&self, range: &RevisionRange) -> FlowyResult<Vec<Revision>> {
  289. let range = range.clone();
  290. let mut records = self.memory_cache.get_with_range(&range).await?;
  291. let range_len = range.len() as usize;
  292. if records.len() != range_len {
  293. let disk_cache = self.disk_cache.clone();
  294. let object_id = self.object_id.clone();
  295. records = spawn_blocking(move || disk_cache.read_revision_records_with_range(&object_id, &range))
  296. .await
  297. .map_err(internal_error)??;
  298. if records.len() != range_len {
  299. tracing::error!("Expect revision len {},but receive {}", range_len, records.len());
  300. }
  301. }
  302. Ok(records
  303. .into_iter()
  304. .map(|record| record.revision)
  305. .collect::<Vec<Revision>>())
  306. }
  307. #[allow(dead_code)]
  308. pub fn delete_revisions_from_range(&self, range: RevisionRange) -> FlowyResult<()> {
  309. let _ = self
  310. .disk_cache
  311. .delete_revision_records(&self.object_id, Some(range.to_rev_ids()))?;
  312. Ok(())
  313. }
  314. }
  315. impl<C> RevisionMemoryCacheDelegate for Arc<dyn RevisionDiskCache<C, Error = FlowyError>> {
  316. fn send_sync(&self, mut records: Vec<SyncRecord>) -> FlowyResult<()> {
  317. records.retain(|record| record.write_to_disk);
  318. if !records.is_empty() {
  319. tracing::Span::current().record(
  320. "checkpoint_result",
  321. &format!("{} records were saved", records.len()).as_str(),
  322. );
  323. let _ = self.create_revision_records(records)?;
  324. }
  325. Ok(())
  326. }
  327. fn receive_ack(&self, object_id: &str, rev_id: i64) {
  328. let changeset = RevisionChangeset {
  329. object_id: object_id.to_string(),
  330. rev_id: rev_id.into(),
  331. state: RevisionState::Ack,
  332. };
  333. match self.update_revision_record(vec![changeset]) {
  334. Ok(_) => {}
  335. Err(e) => tracing::error!("{}", e),
  336. }
  337. }
  338. }
  339. #[derive(Default)]
  340. struct DeferSyncSequence {
  341. rev_ids: VecDeque<i64>,
  342. compact_index: Option<usize>,
  343. compact_length: usize,
  344. }
  345. impl DeferSyncSequence {
  346. fn new() -> Self {
  347. DeferSyncSequence::default()
  348. }
  349. /// Pushes the new_rev_id to the end of the list and marks this new_rev_id is mergeable.
  350. ///
  351. /// When calling `compact` method, it will return a list of revision ids started from
  352. /// the `compact_start_pos`, and ends with the `compact_length`.
  353. fn merge_recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  354. let _ = self.recv(new_rev_id)?;
  355. self.compact_length += 1;
  356. if self.compact_index.is_none() && !self.rev_ids.is_empty() {
  357. self.compact_index = Some(self.rev_ids.len() - 1);
  358. }
  359. Ok(())
  360. }
  361. /// Pushes the new_rev_id to the end of the list.
  362. fn recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  363. // The last revision's rev_id must be greater than the new one.
  364. if let Some(rev_id) = self.rev_ids.back() {
  365. if *rev_id >= new_rev_id {
  366. tracing::error!("The new revision's id must be greater than {}", rev_id);
  367. return Ok(());
  368. }
  369. }
  370. self.rev_ids.push_back(new_rev_id);
  371. Ok(())
  372. }
  373. /// Removes the rev_id from the list
  374. fn ack(&mut self, rev_id: &i64) -> FlowyResult<()> {
  375. let cur_rev_id = self.rev_ids.front().cloned();
  376. if let Some(pop_rev_id) = cur_rev_id {
  377. if &pop_rev_id != rev_id {
  378. let desc = format!(
  379. "The ack rev_id:{} is not equal to the current rev_id:{}",
  380. rev_id, pop_rev_id
  381. );
  382. return Err(FlowyError::internal().context(desc));
  383. }
  384. let mut compact_rev_id = None;
  385. if let Some(compact_index) = self.compact_index {
  386. compact_rev_id = self.rev_ids.get(compact_index).cloned();
  387. }
  388. let pop_rev_id = self.rev_ids.pop_front();
  389. if let (Some(compact_rev_id), Some(pop_rev_id)) = (compact_rev_id, pop_rev_id) {
  390. if compact_rev_id <= pop_rev_id && self.compact_length > 0 {
  391. self.compact_length -= 1;
  392. }
  393. }
  394. }
  395. Ok(())
  396. }
  397. fn next_rev_id(&self) -> Option<i64> {
  398. self.rev_ids.front().cloned()
  399. }
  400. fn clear(&mut self) {
  401. self.compact_index = None;
  402. self.compact_length = 0;
  403. self.rev_ids.clear();
  404. }
  405. // Compact the rev_ids into one except the current synchronizing rev_id.
  406. fn compact(&mut self) -> VecDeque<i64> {
  407. let mut compact_seq = VecDeque::with_capacity(self.rev_ids.len());
  408. if let Some(start) = self.compact_index {
  409. if start < self.rev_ids.len() {
  410. let seq = self.rev_ids.split_off(start);
  411. compact_seq.extend(seq);
  412. }
  413. }
  414. self.compact_index = None;
  415. self.compact_length = 0;
  416. compact_seq
  417. }
  418. }