Elastic, geo-distributed RAFT

From Publication
   @inproceedings{10.1145/3326285.3329046,
       author = {Xu, Zichen and Stewart, Christopher and Huang, Jiacheng},
       title = {Elastic, Geo-Distributed RAFT},
       year = {2019},
       isbn = {9781450367783},
       publisher = {Association for Computing Machinery},
       address = {New York, NY, USA},
       url = {https://doi.org/10.1145/3326285.3329046},
       doi = {10.1145/3326285.3329046},
       abstract = {Raft is a protocol to maintain strong consistency across data replicas in cloud. It is widely used, especially by workloads that span geographically distributed sites. As these workloads grow, Raft's costs should grow, as least proportionally. However, auto scaling approaches for Raft inflate costs by provisioning at all sites when one site exhausts its local resources. This paper presents Geo-Raft, a scale-out mechanism that enables precise auto scaling for Raft. Geo-Raft extends Raft with the following abstractions: (1) secretaries which takes log processing for the leader and (2) observers which process read requests for followers. These abstractions are stateless, allowing for elastic auto scaling, even on unreliable spot instances. Geo-Raft provably preserves strong consistency guarantees provided by Raft. We implemented and evaluated Geo-Raft with multiple auto scaling techniques on Amazon EC2. Geo-Raft scales in resource footprint increments 5-7X smaller than Multi-Raft, the state of the art. Using spot instances, Geo-Raft reduces costs by 84.5\% compared to Multi-Raft. Geo-Raft improves goodput of 95th-percentile SLO by 9X.Geo-Raft operates key-value services for 6 months without losing data or crash.},
       booktitle = {Proceedings of the International Symposium on Quality of Service},
       articleno = {11},
       numpages = {9},
       location = {Phoenix, Arizona},
       series = {IWQoS '19}
   }