diff --git a/.gitignore b/.gitignore index c04d8f9..9e2320e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,26 +3,26 @@ *.o build/* vgcore.* -whitepaper/main.aux -whitepaper/main.glo -whitepaper/main.idx -whitepaper/main.ist -whitepaper/main.ilg -whitepaper/main.ind -whitepaper/main.lof -whitepaper/main.bbl -whitepaper/main.bcf -whitepaper/main.blj -whitepaper/main.lot -whitepaper/main.log -whitepaper/main.blg -whitepaper/main.ptc -whitepaper/structure.idx -whitepaper/structure.log -whitepaper/texput.log -whitepaper/main.run.xml -whitepaper/main.toc -whitepaper/main.synctex.gz -whitepaper/main.pdf -whitepaper/main.dvi -whitepaper/main.out.ps +whitepaper/*.aux +whitepaper/*.glo +whitepaper/*.idx +whitepaper/*.ist +whitepaper/*.ilg +whitepaper/*.ind +whitepaper/*.lof +whitepaper/*.bbl +whitepaper/*.bcf +whitepaper/*.blj +whitepaper/*.lot +whitepaper/*.log +whitepaper/*.blg +whitepaper/*.ptc +whitepaper/*.idx +whitepaper/*.log +whitepaper/*.run.xml +whitepaper/*.toc +whitepaper/*.synctex.gz +whitepaper/*.pdf +whitepaper/*.dvi +whitepaper/*.out.ps +whitepaper/*.aux diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..2b552f1 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Debug", + "type": "gdb", + "request": "launch", + "target": "./bin/tests", + "cwd": "${workspaceRoot}", + "valuesFormatting": "parseText", + "env": {"LLVM_PROFILE_FILE":"./bin/tests.profraw"} + } + ] +} \ No newline at end of file diff --git a/V0/include/endian.hpp b/V0/include/endian.hpp index 39e5330..0119282 100644 --- a/V0/include/endian.hpp +++ b/V0/include/endian.hpp @@ -68,13 +68,14 @@ namespace bitops{ internal = swap_if_little(value); } - constexpr void operator+=(const int& v) + template + constexpr void operator+=(const Ta& v) { - internal = swap_if_little(T(*this)+v); + internal = swap_if_little(T(*this)+(T)v); } constexpr operator T() const { return swap_if_little(internal); } }; -} \ No newline at end of file +} diff --git a/V1/src/izaro-storage.cpp b/V1/src/izaro-storage.cpp index e69de29..9c5f229 100644 --- a/V1/src/izaro-storage.cpp +++ b/V1/src/izaro-storage.cpp @@ -0,0 +1,5 @@ + +std::vector args; +std::foreach(argv, argv+argc, [&](const char* v){ + args.push_back(std::string_view(v)); +}); diff --git a/gplib b/gplib index 2cbc1cc..a28c7d1 160000 --- a/gplib +++ b/gplib @@ -1 +1 @@ -Subproject commit 2cbc1cc134a7273f0048f4074101f13b0512f6bf +Subproject commit a28c7d178b7e98904f94a79c07d444bc605d3189 diff --git a/whitepaper/fr_survey_companies.tex b/whitepaper/fr_survey_companies.tex new file mode 100644 index 0000000..6327192 --- /dev/null +++ b/whitepaper/fr_survey_companies.tex @@ -0,0 +1,235 @@ +\documentclass[a4paper,10pt,BCOR10mm,oneside,headsepline]{scrartcl} +\usepackage[french]{babel} +\usepackage[utf8]{inputenc} +\usepackage{wasysym}% provides \ocircle and \Box +\usepackage{enumitem}% easy control of topsep and leftmargin for lists +\usepackage{color}% used for background color +\usepackage{forloop}% used for \Qrating and \Qlines +\usepackage{ifthen}% used for \Qitem and \QItem +\usepackage{typearea} +\areaset{17cm}{26cm} +\setlength{\topmargin}{-1cm} +\usepackage{scrpage2} +\pagestyle{scrheadings} +\ihead{Enquête sur la sécurité des données (informations sur l'entreprise)} +\ohead{\pagemark} +\chead{} +\cfoot{} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Beginning of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% 2010, 2012 by Sven Hartenstein +%% mail@svenhartenstein.de +%% http://www.svenhartenstein.de +%% +%% Please be warned that this is NOT a full-featured framework for +%% creating (all sorts of) questionnaires. Rather, it is a small +%% collection of LaTeX commands that I found useful when creating a +%% questionnaire. Feel free to copy and adjust any parts you like. +%% Most probably, you will want to change the commands, so that they +%% fit your taste. +%% +%% Also note that I am not a LaTeX expert! Things can very likely be +%% done much more elegant than I was able to. If you have suggestions +%% about what can be improved please send me an email. I intend to +%% add good tipps to my website and to name contributers of course. +%% +%% 10/2012: Thanks to karathan for the suggestion to put \noindent +%% before \rule! + +%% \Qq = Questionaire question. Oh, this is just too simple. It helps +%% making it easy to globally change the appearance of questions. +\newcommand{\Qq}[1]{\textbf{#1}} + +%% \QO = Circle or box to be ticked. Used both by direct call and by +%% \Qrating and \Qlist. +\newcommand{\QO}{$\Box$}% or: $\ocircle$ + +%% \Qrating = Automatically create a rating scale with NUM steps, like +%% this: 0--0--0--0--0. +\newcounter{qr} +\newcommand{\Qrating}[1]{\QO\forloop{qr}{1}{\value{qr} < #1}{---\QO}} + +%% \Qline = Again, this is very simple. It helps setting the line +%% thickness globally. Used both by direct call and by \Qlines. +\newcommand{\Qline}[1]{\noindent\rule{#1}{0.6pt}} + +%% \Qlines = Insert NUM lines with width=\linewith. You can change the +%% \vskip value to adjust the spacing. +\newcounter{ql} +\newcommand{\Qlines}[1]{\forloop{ql}{0}{\value{ql}<#1}{\vskip0em\Qline{\linewidth}}} + +%% \Qlist = This is an environment very similar to itemize but with +%% \QO in front of each list item. Useful for classical multiple +%% choice. Change leftmargin and topsep accourding to your taste. +\newenvironment{Qlist}{% +\renewcommand{\labelitemi}{\QO} +\begin{itemize}[leftmargin=1.5em,topsep=-.5em] +}{% +\end{itemize} +} + +%% \Qtab = A "tabulator simulation". The first argument is the +%% distance from the left margin. The second argument is content which +%% is indented within the current row. +\newlength{\qt} +\newcommand{\Qtab}[2]{ +\setlength{\qt}{\linewidth} +\addtolength{\qt}{-#1} +\hfill\parbox[t]{\qt}{\raggedright #2} +} + +%% \Qitem = Item with automatic numbering. The first optional argument +%% can be used to create sub-items like 2a, 2b, 2c, ... The item +%% number is increased if the first argument is omitted or equals 'a'. +%% You will have to adjust this if you prefer a different numbering +%% scheme. Adjust topsep and leftmargin as needed. +\newcounter{itemnummer} +\newcommand{\Qitem}[2][]{% #1 optional, #2 notwendig +\ifthenelse{\equal{#1}{}}{\stepcounter{itemnummer}}{} +\ifthenelse{\equal{#1}{a}}{\stepcounter{itemnummer}}{} +\begin{enumerate}[topsep=2pt,leftmargin=2.8em] +\item[\textbf{\arabic{itemnummer}#1.}] #2 +\end{enumerate} +} + +%% \QItem = Like \Qitem but with alternating background color. This +%% might be error prone as I hard-coded some lengths (-5.25pt and +%% -3pt)! I do not yet understand why I need them. +\definecolor{bgodd}{rgb}{0.8,0.8,0.8} +\definecolor{bgeven}{rgb}{0.9,0.9,0.9} +\newcounter{itemoddeven} +\newlength{\gb} +\newcommand{\QItem}[2][]{% #1 optional, #2 notwendig +\setlength{\gb}{\linewidth} +\addtolength{\gb}{-5.25pt} +\ifthenelse{\equal{\value{itemoddeven}}{0}}{% +\noindent\colorbox{bgeven}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\stepcounter{itemoddeven}% +}{% +\noindent\colorbox{bgodd}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\setcounter{itemoddeven}{0}% +} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% End of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{document} + +\begin{center} +\textbf{\huge Enquête sur la sécurité des données} +\end{center}\vskip1em +\textbf{\Large Identifiant de l'entreprise \huge \Square \Square \Square \Square \Square \Square \Square \Square } + + + +\Qitem{ \Qq{Quel domaine représente le mieux votre entreprise?} +\begin{Qlist} +\item Médical et Santé +\item Science et Technologie +\item Éducation et Enfance +\item Commerce, Négoce et Distribution +\item Bois, Papier, Carton et Imprimerie +\item Édition, Communication et Multimédia +\item Banque et Assurance +\item Mécanique, Équipement et Automobile +\item Agroalimentaire +\item Services aux entreprises, Études et Conseil +\item Textile et Habillement +\item Plastique et Caoutchouc +\item Transport et Logistique +\item Immobilier +\item autre ou refus de mentionner +\end{Qlist} +} + +\Qitem{ \Qq{Combien d'employés compte votre entreprise~?} +\begin{Qlist} +\item 1 à 5 +\item 6 à 15 +\item 16 à 50 +\item 51 à 150 +\item plus +\item refus de mentionner +\end{Qlist} +} + +\Qitem{ \Qq{Votre entreprise stocke-t-elle des informations personelles ou sensibles de ses clients~?} + +\QO{} Oui \hskip0.5cm \QO{} Non \hskip0.5cm \QO{} Refus de mentionner } + + +\minisec{À quel degré votre entreprise est-elle concernée par:} + +\Qitem[a]{ \Qq{La sécurité de ses informations (inquiétudes relatives au vol)} + +\Qtab{3cm}{Peu concernée \Qrating{5} +Très concernée}} + +\Qitem[b]{ \Qq{La sureté de ses informations (perte/destruction accidentelle)} + +\Qtab{3cm}{Peu concernée \Qrating{5} +Très concernée}} + + + +\Qitem{ \Qq{Combien de terminaux informatiques sont en activité dans votre entreprise~?} +\begin{Qlist} +\item aucun +\item 1 à 5 +\item 6 à 15 +\item 16 à 50 +\item 51 à 150 +\item plus +\item refus de mentionner +\end{Qlist} +} + +\pagebreak + +\Qitem{ \Qq{Combien de serveurs informatiques sont en activité dans votre entreprise~?} +\begin{Qlist} +\item aucun +\item 1 à 5 +\item 6 à 15 +\item 16 à 50 +\item 51 à 150 +\item plus +\item refus de mentionner +\end{Qlist} +} + + + +\Qitem{ \Qq{Votre entreprise utilise-t-elle des outils parmi cette liste~?} +\begin{Qlist} +\item Logiciels de chiffrements de disques +\item Matériel de chiffrement de disques +\item VPN (Réseaux privés virtuels) +\item Applications de messageries chiffrées (y compris emails chiffrés avec PGP) +\item Disques réseaux chiffrés +\item Logiciels de gestion de mots de passe +\end{Qlist} +} + + + +\Qitem{ \Qq{Si votre entreprise dispose de serveurs informatiques stockant des données sensibles, combien de personnes peuvent physiquement accéder à ces machines~?} +\begin{Qlist} +\item N/A +\item 1 à 5 +\item 6 à 15 +\item 16 à 50 +\item 51 à 150 +\item plus +\item refus de mentionner +\end{Qlist} +} + + + +\end{document} diff --git a/whitepaper/fr_survey_employees.tex b/whitepaper/fr_survey_employees.tex new file mode 100644 index 0000000..5e2d431 --- /dev/null +++ b/whitepaper/fr_survey_employees.tex @@ -0,0 +1,170 @@ +\documentclass[a4paper,10pt,BCOR10mm,oneside,headsepline]{scrartcl} +\usepackage[french]{babel} +\usepackage[utf8]{inputenc} +\usepackage{wasysym}% provides \ocircle and \Box +\usepackage{enumitem}% easy control of topsep and leftmargin for lists +\usepackage{color}% used for background color +\usepackage{forloop}% used for \Qrating and \Qlines +\usepackage{ifthen}% used for \Qitem and \QItem +\usepackage{typearea} +\areaset{17cm}{26cm} +\setlength{\topmargin}{-1cm} +\usepackage{scrpage2} +\pagestyle{scrheadings} +\ihead{Enquête sur la sécurité des données (fiche employé)} +\ohead{\pagemark} +\chead{} +\cfoot{} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Beginning of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% 2010, 2012 by Sven Hartenstein +%% mail@svenhartenstein.de +%% http://www.svenhartenstein.de +%% +%% Please be warned that this is NOT a full-featured framework for +%% creating (all sorts of) questionnaires. Rather, it is a small +%% collection of LaTeX commands that I found useful when creating a +%% questionnaire. Feel free to copy and adjust any parts you like. +%% Most probably, you will want to change the commands, so that they +%% fit your taste. +%% +%% Also note that I am not a LaTeX expert! Things can very likely be +%% done much more elegant than I was able to. If you have suggestions +%% about what can be improved please send me an email. I intend to +%% add good tipps to my website and to name contributers of course. +%% +%% 10/2012: Thanks to karathan for the suggestion to put \noindent +%% before \rule! + +%% \Qq = Questionaire question. Oh, this is just too simple. It helps +%% making it easy to globally change the appearance of questions. +\newcommand{\Qq}[1]{\textbf{#1}} + +%% \QO = Circle or box to be ticked. Used both by direct call and by +%% \Qrating and \Qlist. +\newcommand{\QO}{$\Box$}% or: $\ocircle$ + +%% \Qrating = Automatically create a rating scale with NUM steps, like +%% this: 0--0--0--0--0. +\newcounter{qr} +\newcommand{\Qrating}[1]{\QO\forloop{qr}{1}{\value{qr} < #1}{---\QO}} + +%% \Qline = Again, this is very simple. It helps setting the line +%% thickness globally. Used both by direct call and by \Qlines. +\newcommand{\Qline}[1]{\noindent\rule{#1}{0.6pt}} + +%% \Qlines = Insert NUM lines with width=\linewith. You can change the +%% \vskip value to adjust the spacing. +\newcounter{ql} +\newcommand{\Qlines}[1]{\forloop{ql}{0}{\value{ql}<#1}{\vskip0em\Qline{\linewidth}}} + +%% \Qlist = This is an environment very similar to itemize but with +%% \QO in front of each list item. Useful for classical multiple +%% choice. Change leftmargin and topsep accourding to your taste. +\newenvironment{Qlist}{% +\renewcommand{\labelitemi}{\QO} +\begin{itemize}[leftmargin=1.5em,topsep=-.5em] +}{% +\end{itemize} +} + +%% \Qtab = A "tabulator simulation". The first argument is the +%% distance from the left margin. The second argument is content which +%% is indented within the current row. +\newlength{\qt} +\newcommand{\Qtab}[2]{ +\setlength{\qt}{\linewidth} +\addtolength{\qt}{-#1} +\hfill\parbox[t]{\qt}{\raggedright #2} +} + +%% \Qitem = Item with automatic numbering. The first optional argument +%% can be used to create sub-items like 2a, 2b, 2c, ... The item +%% number is increased if the first argument is omitted or equals 'a'. +%% You will have to adjust this if you prefer a different numbering +%% scheme. Adjust topsep and leftmargin as needed. +\newcounter{itemnummer} +\newcommand{\Qitem}[2][]{% #1 optional, #2 notwendig +\ifthenelse{\equal{#1}{}}{\stepcounter{itemnummer}}{} +\ifthenelse{\equal{#1}{a}}{\stepcounter{itemnummer}}{} +\begin{enumerate}[topsep=2pt,leftmargin=2.8em] +\item[\textbf{\arabic{itemnummer}#1.}] #2 +\end{enumerate} +} + +%% \QItem = Like \Qitem but with alternating background color. This +%% might be error prone as I hard-coded some lengths (-5.25pt and +%% -3pt)! I do not yet understand why I need them. +\definecolor{bgodd}{rgb}{0.8,0.8,0.8} +\definecolor{bgeven}{rgb}{0.9,0.9,0.9} +\newcounter{itemoddeven} +\newlength{\gb} +\newcommand{\QItem}[2][]{% #1 optional, #2 notwendig +\setlength{\gb}{\linewidth} +\addtolength{\gb}{-5.25pt} +\ifthenelse{\equal{\value{itemoddeven}}{0}}{% +\noindent\colorbox{bgeven}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\stepcounter{itemoddeven}% +}{% +\noindent\colorbox{bgodd}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\setcounter{itemoddeven}{0}% +} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% End of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{document} + +\begin{center} +\textbf{\huge Enquête sur la sécurité des données} +\end{center}\vskip1em +\textbf{\Large Identifiant de l'entreprise \huge \Square \Square \Square \Square \Square \Square \Square \Square } + + + +\Qitem{ \Qq{Pensez-vous que votre société collecte des informations personnelles ou sensibles de ses clients~?} + +\Qtab{3cm}{Aucune information \Qrating{5} +Beaucoup d'informations}} + +\Qitem{ \Qq{À quel point vous sentez-vous impliqué.e dans la protection des données clients~?} + +\Qtab{3cm}{Peu impliqué.e \Qrating{5} +Très impliqué.e}} + + +\Qitem{ \Qq{Quels facteurs garantissent la sécurité de votre système d'informations~?} +\begin{Qlist} +\item Restrictions de l'accès physique aux terminaux +\item Mot de passe partagé +\item Mot de passe personnel +\item Badge ou carte +\item Authentification biométrique +\item Clé physique +\item Destruction systématique des documents confidentiels +\end{Qlist} +} + +\Qitem{ \Qq{Les données gérées par votre entreprise sont-elles chiffrées~?} +\begin{Qlist} +\item oui +\item partiellement +\item non +\item je ne sais pas +\item refus de mentionner +\end{Qlist} +} + +\Qitem{ \Qq{Pensez-vous que les mesures de protection des données de votre entreprise sont suffisantes~?} + +\Qtab{3cm}{Insuffisantes \Qrating{5} +Plus que suffisantes}} + + + +\end{document} diff --git a/whitepaper/fr_survey_howto.tex b/whitepaper/fr_survey_howto.tex new file mode 100644 index 0000000..83f979c --- /dev/null +++ b/whitepaper/fr_survey_howto.tex @@ -0,0 +1,174 @@ +\documentclass[a4paper,10pt,BCOR10mm,oneside,headsepline]{scrartcl} +\usepackage[french]{babel} +\usepackage[utf8]{inputenc} +\usepackage{wasysym}% provides \ocircle and \Box +\usepackage{enumitem}% easy control of topsep and leftmargin for lists +\usepackage{color}% used for background color +\usepackage{forloop}% used for \Qrating and \Qlines +\usepackage{ifthen}% used for \Qitem and \QItem +\usepackage{typearea} +\usepackage{eurosym} +\usepackage{numprint} +\usepackage{bytefield} +\usepackage{siunitx} +\usepackage{placeins} +\usepackage{pgf-umlsd} +\usepackage{pgf-umlcd} +\usepackage{adjustbox} +\usepackage{multirow} +\usepackage{enumitem} +\usepackage{hhline} +\usepackage{pgfplots} +\usepackage{float} +\usepackage{fp} +\areaset{17cm}{26cm} +\setlength{\topmargin}{-1cm} +\usepackage{scrpage2} +\pagestyle{scrheadings} +\ihead{Enquête sur la sécurité des données (à propos)} +\ohead{\pagemark} +\chead{} +\cfoot{} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Beginning of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% 2010, 2012 by Sven Hartenstein +%% mail@svenhartenstein.de +%% http://www.svenhartenstein.de +%% +%% Please be warned that this is NOT a full-featured framework for +%% creating (all sorts of) questionnaires. Rather, it is a small +%% collection of LaTeX commands that I found useful when creating a +%% questionnaire. Feel free to copy and adjust any parts you like. +%% Most probably, you will want to change the commands, so that they +%% fit your taste. +%% +%% Also note that I am not a LaTeX expert! Things can very likely be +%% done much more elegant than I was able to. If you have suggestions +%% about what can be improved please send me an email. I intend to +%% add good tipps to my website and to name contributers of course. +%% +%% 10/2012: Thanks to karathan for the suggestion to put \noindent +%% before \rule! + +%% \Qq = Questionaire question. Oh, this is just too simple. It helps +%% making it easy to globally change the appearance of questions. +\newcommand{\Qq}[1]{\textbf{#1}} + +%% \QO = Circle or box to be ticked. Used both by direct call and by +%% \Qrating and \Qlist. +\newcommand{\QO}{$\Box$}% or: $\ocircle$ + +%% \Qrating = Automatically create a rating scale with NUM steps, like +%% this: 0--0--0--0--0. +\newcounter{qr} +\newcommand{\Qrating}[1]{\QO\forloop{qr}{1}{\value{qr} < #1}{---\QO}} + +%% \Qline = Again, this is very simple. It helps setting the line +%% thickness globally. Used both by direct call and by \Qlines. +\newcommand{\Qline}[1]{\noindent\rule{#1}{0.6pt}} + +%% \Qlines = Insert NUM lines with width=\linewith. You can change the +%% \vskip value to adjust the spacing. +\newcounter{ql} +\newcommand{\Qlines}[1]{\forloop{ql}{0}{\value{ql}<#1}{\vskip0em\Qline{\linewidth}}} + +%% \Qlist = This is an environment very similar to itemize but with +%% \QO in front of each list item. Useful for classical multiple +%% choice. Change leftmargin and topsep accourding to your taste. +\newenvironment{Qlist}{% +\renewcommand{\labelitemi}{\QO} +\begin{itemize}[leftmargin=1.5em,topsep=-.5em] +}{% +\end{itemize} +} + +%% \Qtab = A "tabulator simulation". The first argument is the +%% distance from the left margin. The second argument is content which +%% is indented within the current row. +\newlength{\qt} +\newcommand{\Qtab}[2]{ +\setlength{\qt}{\linewidth} +\addtolength{\qt}{-#1} +\hfill\parbox[t]{\qt}{\raggedright #2} +} + +%% \Qitem = Item with automatic numbering. The first optional argument +%% can be used to create sub-items like 2a, 2b, 2c, ... The item +%% number is increased if the first argument is omitted or equals 'a'. +%% You will have to adjust this if you prefer a different numbering +%% scheme. Adjust topsep and leftmargin as needed. +\newcounter{itemnummer} +\newcommand{\Qitem}[2][]{% #1 optional, #2 notwendig +\ifthenelse{\equal{#1}{}}{\stepcounter{itemnummer}}{} +\ifthenelse{\equal{#1}{a}}{\stepcounter{itemnummer}}{} +\begin{enumerate}[topsep=2pt,leftmargin=2.8em] +\item[\textbf{\arabic{itemnummer}#1.}] #2 +\end{enumerate} +} + +%% \QItem = Like \Qitem but with alternating background color. This +%% might be error prone as I hard-coded some lengths (-5.25pt and +%% -3pt)! I do not yet understand why I need them. +\definecolor{bgodd}{rgb}{0.8,0.8,0.8} +\definecolor{bgeven}{rgb}{0.9,0.9,0.9} +\newcounter{itemoddeven} +\newlength{\gb} +\newcommand{\QItem}[2][]{% #1 optional, #2 notwendig +\setlength{\gb}{\linewidth} +\addtolength{\gb}{-5.25pt} +\ifthenelse{\equal{\value{itemoddeven}}{0}}{% +\noindent\colorbox{bgeven}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\stepcounter{itemoddeven}% +}{% +\noindent\colorbox{bgodd}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\setcounter{itemoddeven}{0}% +} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% End of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{document} + +\begin{center} +\textbf{\huge Enquête sur la sécurité des données} +\end{center}\vskip1em + +\section*{Pourquoi~?} + +Ce questionaire à plusieurs objectifs: +\begin{itemize} +\item mieux comprendre l'approche des utilisateurs et des entreprises face aux nouvelles technologies de protection des données +\item promouvoir le développement de nouvelles solutions de protection des données +\end{itemize} + +Ces données seront mises à disposition des internautes, anonymisées, sous la forme d'une base de données téléchargeable. + +\section*{Qui~?} + +Cette recherche est effectuée par un petit groupe de développeurs et acteurs de l'informatique. La version française du questionnaire à été conçue par Ludovic Lagouardette. + +Les données seront accessibles publiquement à partir de février 2020 par n'importe quel internaute, ainsi qu'une liste des statistiques obtenues, à l'addresse suivante~: \texttt{https://archivist.nekoit.xyz/enquete1/ } + +\section*{Comment~?} + +Si vous répondez à une fiche employé ou entreprise, veillez à fournir le même \texttt{Identifiant de l'entreprise} que vos collègues dans la case semblable à la figure ci-dessous. + +\begin{figure}[H] +\centering +\begin{minipage}{0.7\textwidth} +\textbf{\Large Identifiant de l'entreprise \huge \Square \Square \Square \Square \Square \Square \Square \Square } +\end{minipage} +\end{figure} + +Veuillez cocher dans les questions à choix multiples la ou les cases qui vous concernent. En cas de réponses douteuses sur une fiche, celle-ci sera ignorée, de même en cas de mention d'informations specifiques à une personne ou entreprise dans le formulaire. + +\section*{Où~?} + +Ce sondage sera majoritairement effectué en France, mais potentiellement aussi aux Pays-Bas et/ou aux États-Unis. + +\end{document} diff --git a/whitepaper/fr_survey_privates.tex b/whitepaper/fr_survey_privates.tex new file mode 100644 index 0000000..30fc91d --- /dev/null +++ b/whitepaper/fr_survey_privates.tex @@ -0,0 +1,190 @@ +\documentclass[a4paper,10pt,BCOR10mm,oneside,headsepline]{scrartcl} +\usepackage[french]{babel} +\usepackage[utf8]{inputenc} +\usepackage{wasysym}% provides \ocircle and \Box +\usepackage{enumitem}% easy control of topsep and leftmargin for lists +\usepackage{color}% used for background color +\usepackage{forloop}% used for \Qrating and \Qlines +\usepackage{ifthen}% used for \Qitem and \QItem +\usepackage{typearea} +\areaset{17cm}{26cm} +\setlength{\topmargin}{-1cm} +\usepackage{scrpage2} +\pagestyle{scrheadings} +\ihead{Enquête sur la sécurité des données (fiche particulier)} +\ohead{\pagemark} +\chead{} +\cfoot{} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Beginning of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% 2010, 2012 by Sven Hartenstein +%% mail@svenhartenstein.de +%% http~://www.svenhartenstein.de +%% +%% Please be warned that this is NOT a full-featured framework for +%% creating (all sorts of) questionnaires. Rather, it is a small +%% collection of LaTeX commands that I found useful when creating a +%% questionnaire. Feel free to copy and adjust any parts you like. +%% Most probably, you will want to change the commands, so that they +%% fit your taste. +%% +%% Also note that I am not a LaTeX expert! Things can very likely be +%% done much more elegant than I was able to. If you have suggestions +%% about what can be improved please send me an email. I intend to +%% add good tipps to my website and to name contributers of course. +%% +%% 10/2012~: Thanks to karathan for the suggestion to put \noindent +%% before \rule! + +%% \Qq = Questionaire question. Oh, this is just too simple. It helps +%% making it easy to globally change the appearance of questions. +\newcommand{\Qq}[1]{\textbf{#1}} + +%% \QO = Circle or box to be ticked. Used both by direct call and by +%% \Qrating and \Qlist. +\newcommand{\QO}{$\Box$}% or~: $\ocircle$ + +%% \Qrating = Automatically create a rating scale with NUM steps, like +%% this~: 0--0--0--0--0. +\newcounter{qr} +\newcommand{\Qrating}[1]{\QO\forloop{qr}{1}{\value{qr} < #1}{---\QO}} + +%% \Qline = Again, this is very simple. It helps setting the line +%% thickness globally. Used both by direct call and by \Qlines. +\newcommand{\Qline}[1]{\noindent\rule{#1}{0.6pt}} + +%% \Qlines = Insert NUM lines with width=\linewith. You can change the +%% \vskip value to adjust the spacing. +\newcounter{ql} +\newcommand{\Qlines}[1]{\forloop{ql}{0}{\value{ql}<#1}{\vskip0em\Qline{\linewidth}}} + +%% \Qlist = This is an environment very similar to itemize but with +%% \QO in front of each list item. Useful for classical multiple +%% choice. Change leftmargin and topsep accourding to your taste. +\newenvironment{Qlist}{% +\renewcommand{\labelitemi}{\QO} +\begin{itemize}[leftmargin=1.5em,topsep=-.5em,label={\Square}] +}{% +\end{itemize} +} + +%% \Qtab = A "tabulator simulation". The first argument is the +%% distance from the left margin. The second argument is content which +%% is indented within the current row. +\newlength{\qt} +\newcommand{\Qtab}[2]{ +\setlength{\qt}{\linewidth} +\addtolength{\qt}{-#1} +\hfill\parbox[t]{\qt}{\raggedright #2} +} + +%% \Qitem = Item with automatic numbering. The first optional argument +%% can be used to create sub-items like 2a, 2b, 2c, ... The item +%% number is increased if the first argument is omitted or equals 'a'. +%% You will have to adjust this if you prefer a different numbering +%% scheme. Adjust topsep and leftmargin as needed. +\newcounter{itemnummer} +\newcommand{\Qitem}[2][]{% #1 optional, #2 notwendig +\ifthenelse{\equal{#1}{}}{\stepcounter{itemnummer}}{} +\ifthenelse{\equal{#1}{a}}{\stepcounter{itemnummer}}{} +\begin{enumerate}[topsep=2pt,leftmargin=2.8em] +\item[\textbf{\arabic{itemnummer}#1.}] #2 +\end{enumerate} +} + +%% \QItem = Like \Qitem but with alternating background color. This +%% might be error prone as I hard-coded some lengths (-5.25pt and +%% -3pt)! I do not yet understand why I need them. +\definecolor{bgodd}{rgb}{0.8,0.8,0.8} +\definecolor{bgeven}{rgb}{0.9,0.9,0.9} +\newcounter{itemoddeven} +\newlength{\gb} +\newcommand{\QItem}[2][]{% #1 optional, #2 notwendig +\setlength{\gb}{\linewidth} +\addtolength{\gb}{-5.25pt} +\ifthenelse{\equal{\value{itemoddeven}}{0}}{% +\noindent\colorbox{bgeven}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\stepcounter{itemoddeven}% +}{% +\noindent\colorbox{bgodd}{\hskip-3pt\begin{minipage}{\gb}\Qitem[#1]{#2}\end{minipage}}% +\setcounter{itemoddeven}{0}% +} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% End of questionnaire command definitions %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{document} + +\begin{center} +\textbf{\huge Enquête sur la sécurité des données} +\end{center}\vskip1em + +\Qitem{ \Qq{Êtes-vous inquiété.e par la collecte d'informations personnelles ou sensibles par les entreprises~?} + + +\Qtab{3cm}{Peu inquiété.e \Qrating{5} +Très inquiété.e}} + + +\Qitem{ \Qq{À quel point vous sentez-vous impliqué.e dans la protection de vos données~?} + +\Qtab{3cm}{Peu impliqué.e \Qrating{5} +Très impliqué.e}} + + +\Qitem{ \Qq{Utilisez-vous des outils numériques de protection de la vie privée~?} +\begin{Qlist} +\item Disques chiffrés +\item Systèmes d'exploitation open-source +\item VPN (réseau privé virtuel) +\item Cloud/Disque nuagique chiffré en ligne +\item Messagerie chiffrée (y compris PGP) +\item Autres~: \Qline{4cm} +\end{Qlist} +} + +\Qitem{ \Qq{À quelles fins accepteriez-vous que vos données personnelles ou sensibles soient transmises à une tierce partie~?} +\begin{Qlist} +\item À des fins commerciales, dans le cadre~: +\begin{Qlist} + \item de publicité ciblée + \item de téléprospection + \item d'études de marché (données anonymisées) +\end{Qlist} +\item À des fins juridiques, afin d'incriminer sur~: +\begin{Qlist} + \item des délits + \item des crimes + \item des affaires de terrorisme +\end{Qlist} +\item À des fins scientifiques, dans le cadre de recherche en~: +\begin{Qlist} + \item médecine + \item sociologie + \item économie +\end{Qlist} +\item À des fins politiques, afin de déterminer~: +\begin{Qlist} + \item des statistiques anonymes sur votre alignement politique + \item vos demandes et intérets +\end{Qlist} +\end{Qlist} +} + +\Qitem{ \Qq{Que pensez-vous de l'état de la protection de vos données contre le vol~?} + +\Qtab{3cm}{Insuffisantes \Qrating{5} +Plus que suffisantes}} + +\Qitem{ \Qq{Que pensez-vous de l'état de la protection de vos données contre la perte ou la destruction accidentelle~?} + +\Qtab{3cm}{Insuffisantes \Qrating{5} +Plus que suffisantes}} + + +\end{document} diff --git a/whitepaper/main.tex b/whitepaper/main.tex index 9c99cbe..82f124a 100755 --- a/whitepaper/main.tex +++ b/whitepaper/main.tex @@ -34,6 +34,7 @@ %---------------------------------------------------------------------------------------- \documentclass[12pt,fleqn]{book} % Default font size and left-justified equations +\usepackage[T1]{fontenc} \usepackage[automake]{glossaries} \usepackage{amssymb} \usepackage{wasysym} @@ -44,12 +45,14 @@ \usepackage{siunitx} \usepackage{placeins} \usepackage{pgf-umlsd} +\usepackage{pgf-umlcd} \usepackage{adjustbox} \usepackage{multirow} \usepackage{enumitem} \usepackage{hhline} \usepackage{pgfplots} \usepackage{float} +\usepackage{fp} \let\Oldsection\section \renewcommand{\section}{\FloatBarrier\Oldsection} @@ -61,7 +64,7 @@ \renewcommand{\subsubsection}{\FloatBarrier\Oldsubsubsection} \author{Ludovic `Archivist' Lagouardette} -\title{Advanced Storage system} +\title{SStorage} \date{2019} \makeglossaries @@ -82,7 +85,7 @@ \thispagestyle{empty} % Suppress headers and footers on the title page \begin{tikzpicture}[remember picture,overlay] \node[inner sep=0pt] (background) at (current page.center) {\includegraphics[width=\paperwidth]{background.pdf}}; -\draw (current page.center) node [fill=ocre!30!white,fill opacity=0.6,text opacity=1,inner sep=1cm]{\Huge\centering\bfseries\sffamily\parbox[c][][t]{\paperwidth}{\centering Advanced Storage System\\[15pt] % Book title +\draw (current page.center) node [fill=ocre!30!white,fill opacity=0.6,text opacity=1,inner sep=1cm]{\Huge\centering\bfseries\sffamily\parbox[c][][t]{\paperwidth}{\centering SStorage\\[15pt] % Book title {\Large Whitepaper}\\[20pt] % Subtitle {\small Ludovic `Archivist' Lagouardette}}}; % Author name \end{tikzpicture} @@ -106,7 +109,7 @@ \noindent \textsc{https://archivist.nekoit.xyz}\\ % URL -\noindent This document is under intellectual property of NekoIT, reproduction and redistribution is allowed in digital format only without any modifications.\\ % License information, replace this with your own license (if any) +\noindent This document is under Creative-Common License BY-NC-SA 3.0.\\ % License information, replace this with your own license (if any) \noindent \textit{First printing, 2019} % Printing/edition date @@ -131,7 +134,7 @@ \pagestyle{fancy} % Enable headers and footers again \mainmatter -\part{Human friendly specification} +\part{Project presentation} \chapter{The state of cloud storage} @@ -145,31 +148,33 @@ \section{Competition} -Multiple service providers, from Google to Amazon as well as some other smaller actors have tackled the task of storing data for a variety of use-cases, for a variety of pricing tables and options. +Multiple service providers, Google, Amazon, Ovh, Apple, as well as some other smaller actors have tackled the task of storing data for a variety of use-cases, pricing tables and options. -In this section we will take a look at the current state of competition. We do not aim at taking a look at the whole market but at a number of important or interesting actors. +In this section you will find a report on the current state of competition. We do not aim at taking a look at the whole market but at a number of important or interesting actors. \subsection{Google Drive and Google Cloud Platform} Google is famous for its economical impact on the software development industry. This is also true of its Google Drive and Google Cloud Platform products. -It is to the point where you can label its product the cheapest way to backup multiple terabytes\autocite{ltt_backup}. You can also quote their other products Google Cloud Storage as a cheap yet very efficient tool tool to store data for applications and websites. +It could be labeled the cheapest way to backup multiple terabytes\autocite{ltt_backup}. You can also quote their other products Google Cloud Storage as a cheap yet very efficient tool to store data for applications and websites. They however do not offer any kind of protection on their services, the data stored on their side is not encrypted and they may use it for advertisement purposes for example. They however are not misleading on their offer even if their product is not privacy centered at all. \subsection{Amazon Cloud Drive and their variety of services} -We will not expand on how varied and efficient Amazon cloud storage is. They basically provide about all types of storage for any type of data structure from the typical file system to the most advanced layouts of databases. +It is possible to expand at length on how varied and efficient Amazon cloud storage is. They provide nearly all types of storage for any type of data structure from the typical file system to the most advanced layouts of databases. Their prices are slightly higher than those of Google. Like Google however, they only propose encryption of the data while in transit. +Most of Amazon Cloud Service are targeted towards professional users. + \subsection{Operation Tulip (NextCloud over Ceph)} -An open-source initiative to propose a simple cloud suite with some file storage and some tools like a calendar and an online LibreOffice implementation. +An open-source initiative to propose a simple cloud suite with file storage and tools like a calendar and an online LibreOffice implementation. -This service is in open beta (can be tested by anyone) and uses some of the most used open source software to hold encrypted data and deal with storage redundancy: Ceph and NextCloud. +This service is in open beta\footnote{can be tested by anyone} and uses open source software to hold encrypted data with storage redundancy: Ceph and NextCloud. -It is however not to be used for actively using the data but more as a backup solution and cold storage. +It is not to be used for actively using the data but more as a backup solution and cold storage. \subsection{Backblaze} @@ -179,62 +184,64 @@ They also offer a storage for live data in one of their offers. Their prizes are \subsection{Dropbox} -A well used actor in backup cloud storage system. They provide multiple tiers of pricing, from a free offer to multiple paid storage offers. All of them are meant for dead storage for roaming and sharing files. +A well known actor in backup cloud storage system. They provide multiple tiers of pricing, from a free offer to multiple paid storage offers. All of them are meant for dead storage, for roaming and for sharing files. \subsection{Tarsnap} A small actor based in Canada. they offer cold storage services, encrypted and open-source on client side. They pricing is on a \textit{"as you go"} basis, pricing network traffic as well as storage used. +The performance of the service was not tested.but from its software architecture and design, it may be relatively slow for using it as an active storage as it is delta based, as well as being unlikely to be usable with a high degree of concurrency. + \section{Technology} -Multiple technology and they open-source counterparts can be used to handle online data storage. In this section we will explore those possibilities by comparing both commercial and free solutions where possible. +Multiple technologies and their open-source counterparts can be used to handle online data storage. In this section we will explore those possibilities by comparing both commercial and free solutions where possible. \subsection{Google Spanner and CockroachDB} -Google Spanner and CockroachDB are two database software for geo-replicated databases. They both use a clock based mechanism for handling transactions, making them faster with better clock synchronization. CockroachDB have however lower requirements on clock accuracy that Google Spanner does\autocite{cockroach_atomic}. +Google Spanner and CockroachDB are two database software for geo-replicated databases. They use a clock based mechanism for handling transactions, making them as fast as their clock synchronization. CockroachDB have however lower requirements on clock accuracy that Google Spanner does\autocite{cockroach_atomic}. -Google Spanner is as its name implies a proprietary product from Google. CockroachDB is an open-source project from CockroachLabs made to implement as much of Google Spanner features as possible. It also intends to try to be compatible with PostgreSQL to ease application porting\autocite{cockroach_postgres}. +Google Spanner is a proprietary product from Google. CockroachDB is an open-source project from CockroachLabs made to implement as much of Google Spanner features as possible. It also intends to try to be compatible with PostgreSQL to ease application porting\autocite{cockroach_postgres}. -Both of those tools can be used to implement either a block based storage or an object storage to use to implement a geo-replicated filesystem. +Both of those tools can be used to implement either a block based storage or an object storage usable to implement a geo-replicated filesystem. -Using CockroachDB as a back-end to implement the system was envisioned, but latency tests made us choose to rather use a custom implemented data server. We however use a very similar way of resolving database conflict (see the sequence diagrams \ref{fig:confirmation_proto} and \ref{fig:2user_confirmation_proto} at page \pageref{fig:confirmation_proto}). +Using CockroachDB as a back-end to implement SStorage was envisioned, but latency tests made us choose to use a custom implemented data server. SStorage uses a similar way of resolving database conflict (see the sequence diagrams \ref{fig:confirmation_proto} and \ref{fig:2user_confirmation_proto} at page \pageref{fig:confirmation_proto} and \pageref{fig:2user_confirmation_proto}). \subsection{Ceph, RADOS and CRUSH} Ceph is a distributed data storage system. It uses the RADOS (Reliable Autonomic Distributed Object Store), a storage system designed around the idea of placing data in predictable place following a mathematical equation. This is named CRUSH, for Controlled Replication Under Scalable Hashing. -Placement of data in our system follows some concepts from Ceph, RADOS and CRUSH. +Placement of data in SStorage system follows some concepts from Ceph, RADOS and CRUSH. -This system is currently in development by the CERN. They use it as a back-end for many types of storage, from filesystems to block devices and storage for scientific data before analysis. +Ceph is currently in development by the CERN. Ceph is used as a back-end for many types of storage, from filesystems to block devices and storage for scientific data. -Like mentioned in the listing of other actors, the Operation Tulip project are using it to store the files they manipulate. Other not mentioned actors like Ovh use it too for example for storing Virtual Machines and as storage for cloud computing. +As mentioned in the listing of other actors, the Operation Tulip project are using it to store the files they manipulate. Other not mentioned actors like Ovh use it for storing Virtual Machines and as storage for cloud computing for example. \subsection{NextCloud} -NextCloud is an open-source system written in PHP to be used as a front-end for cloud hosting. It supports WebDAV and other protocols as well as providing multiple productivity features from text edition to spreadsheets and calendars. +NextCloud is an open-source system written in PHP to be used as a front-end for cloud hosting. It supports WebDAV and other protocols as well as providing multiple productivity features like text edition, spreadsheets and calendars. -It however is slow due to having been designed in a programming language unsuitable for performance applications. +It is slow due to having been designed in a programming language unsuitable for performance applications. It supports end to end encryption. \section{Hardware and hosting} -Naming it cloud storage doesn't mean the data is in some phantasmagorical place. As such we will study here the possibilities for one to deploy his own cluster of servers to host his own data. +Naming it cloud storage doesn't mean the data is in some phantasmagorical place. As such we will study here the possibilities for one to deploy their own cluster of servers to host their own data. -For that we will compare pricing of the hardware required to deploy our solution online for a data size around 50\si{\tera{}B (\pm 5\percent)} of storage. +For that will be provided a comparison of pricing of the hardware required to deploy their own solution online for a data size around 50\si{\tera{}B (\pm 5\percent)} of storage. \begin{table}[h] \centering \begin{tabular}{|l|l|l|l|} \hline -System & Upfront price & Price per GB per year & Amort. (y) \\\hhline{|=|=|=|=|} +System & Upfront price & Price per GB per year & Ac. D.\footnote{Accounting depreciation in years} \\\hhline{|=|=|=|=|} SuperMicro SC825TQ-560LP $\times 3$ & USD15100 & USD0.21 & 5 \\ and SuperMicro 5018D-MF (new) & +USD900/m & & \\\hline HP~ProLiant~DL180-G5 $\times 4$ & USD3350 & USD0.21 & 3 \\ (refurbished) & +USD900/m & & \\\hline -Ovh rented servers $\times 4$ & USD890/m & USD0.21 & 0 \\\hline +Ovh rented servers $\times 4$ & USD780/m & USD0.20 & 0 \\\hline \end{tabular} - \textit{It is to be noted that the performance is also decreasing with each category down.} + \textit{It is to be noted that the performance is also decreasing with each category down, as the upfront price is rising. Monthly fees for non-rented items are the housing for the hardware.} \caption{Server pricing} \label{tab:server_pricing} \end{table} @@ -247,34 +254,38 @@ Brand new hardware is generally a real investment for an individual or a new com Of those families, named architectures, we will consider two: the \texttt{x86\_{}64}, also referred as \texttt{amd64}; and the most recent architecture from the \texttt{ARM} group, the \texttt{ARMv8} architecture and its variants. -Both of them share the minimal set of features for a type of storage named a \texttt{memory mapped hash table} to be implementable to a usable degree. +Both of them share the minimal set of features for a type of storage named a \texttt{memory mapped hash table} to be implementable. \subsubsection{\texttt{x86\_{}64} architecture} -This architecture is common to most modern computers, laptops, workstations and servers nowadays. It is therefore easy to make software for it and it is well documented. +This architecture is common to most modern computers, laptops, workstations and servers. It is therefore easy to make software for it as it is well documented. -It however have the huge drawback of being power hungry, having been extended for more and more performance, it tends to be consume lots of power and hence, to require proportional cooling. +It have the huge drawback of being power hungry, having been extended over time, it tends to be consume more power and require proportional cooling. -Taking for example a server from SuperMicro SC825TQ-560LP, we estimate a price of around 4'500USD per server for the data storage, requiring at least 3 of them, additional ones for ensuring safety in case one of them fails, as well as any other server for handling coordination of data storage. +Taking for example a server from SuperMicro SC825TQ-560LP, an estimate of price around 4'500USD per server three times for the data storage, as well as any other server for handling coordination of data storage. -For that we advise a server of the likes of a SuperMicro 5018D-MF, for which we estimate a price of about 1'600USD if equipped with a proper network card for handling connections to the storage servers properly. +For coordination, a server of the likes of a SuperMicro 5018D-MF, for which we estimate a price of about 1'600USD equipped with a proper network card for handling connections to the storage servers properly is appropriate. \subsubsection{\texttt{ARMv8} architecture} -This architecture being relatively new, we will not adventure into pricing it, but we think that adapted servers for storage equipped with ThunderX2 CPUs from Cavium would do well as storage server and likewise equipped servers with a ThunderX2 adapted for computationally heavy loads would fit the use case as a coordination server. +This architecture being new, it is hard to adventure into pricing it, but adapted servers for storage equipped with ThunderX2 CPUs from Cavium would do well as storage server and likewise equipped servers with a ThunderX2 adapted for computationally heavy loads would fit the use case as a coordination server. -This setup is however untested and it would not be possible at the time of redaction of these lines to test it for us. These servers would also not run some operating systems critical for safety of network infrastructure like OpenBSD. +This setup is however untested and it would not be possible at the time of redaction of these lines to test it for us. These servers also may not run some operating systems critical for safety of network infrastructure like OpenBSD as of the writing of these lines\footnote{last verification on January 14th 2020}. \subsection{Refurbished hardware} -As for refurbished hardware, we looked into the products of professionals in sales of refurbished hardware. We advise for those with small budget a constellation of HP~ProLiant~DL180-G5, with a price per server of about 950USD (disks being new), and any server with a decent enough set of network connectivity to not be a bottleneck. +We looked into the products of professionals in sales of refurbished hardware. It is advised for those with small budget a constellation of HP~ProLiant~DL180-G5, with a price per server of about 950USD (disks being new), and any server with a decent enough set of network connectivity to not be a bottleneck in coordination. \subsection{Rented dedicated servers} -As for dedicated servers, we got our eyes on Ovh, which would propose to rent servers for 230USD per month per server with an added 80USD per month for the coordination server. This doesn't encompasses any backup server additionally needed to guarantee fast replication if one of the three servers fails, but it takes into account all hosting costs. +As for dedicated servers, Ovh proposes to rent servers for 230USD per month per server with an added 80USD per month for the coordination server. This doesn't encompasses any backup server additionally needed to guarantee fast replication if one of the three servers fails, but it takes into account all hosting costs. \section{The users} +We conducted a survey on Telegram and Discord about privacy in cloud storage and cloud storage usage. While with hindsight, some questions given the population surveyed lead to unsurprising responses (Telegram being quite security oriented), they may be interesting on certain regards. + +The survey was conducted on 23 people, most of them from computer science and programming related groups on Telegram and politics and economics related groups on Discord. + \begin{figure}[h] \centering \begin{tikzpicture} @@ -283,7 +294,7 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se enlargelimits=0.15, legend style={at={(0.5,-0.15)}, anchor=north,legend columns=-1}, - ylabel={\#Percentage on interrogated people}, + ylabel={Percentage on interrogated people}, symbolic x coords={Not so concerned,Concerned,Very concerned}, xtick=data, nodes near coords, @@ -296,6 +307,15 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se \label{fig:privacy_concerns} \end{figure} +\FPeval{telegramvpn}{round(800/15, 2)} +\FPeval{notelegramvpn}{round(400/8, 2)} + +First of all, most of the people interrogated were Telegram users, in the demographic of Telegram users \telegramvpn\% are using a VPN, and \notelegramvpn\% or the surveyed people that do not use Telegram use a VPN. + +It also shows that the privacy concerned population have significantly more trust in open-source community approved cryptography than in government approved cryptography. + +Lots of people also use encrypted hard-drives but most are cold to the use of cloud storage. From a few interviews most of them harbor distrust of the majors cloud storage service providers. Our goal is to provide a solution that these people can trust to store their hot and cold data. + \begin{figure} \centering \begin{tikzpicture} @@ -304,7 +324,7 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se enlargelimits=0.15, legend style={at={(0.5,-0.15)}, anchor=north,legend columns=-1}, - ylabel={\#Percentage on interrogated people}, + ylabel={Percentage on interrogated people}, symbolic x coords={A,B,C,D}, xtick=data, nodes near coords, @@ -335,7 +355,7 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se xmin=A, xmax=D, legend style={at={(0.5,0.0)}, anchor=north,legend columns=-1}, - ylabel={\#Percentage on interrogated people}, + ylabel={Percentage on interrogated people}, symbolic x coords={A,B,C,D}, xtick=data, nodes near coords, @@ -364,7 +384,7 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se enlargelimits=0.15, legend style={at={(0.5,-0.15)}, anchor=north,legend columns=-1}, - ylabel={\#Percentage on interrogated people}, + ylabel={Percentage on interrogated people}, symbolic x coords={Yes (Encrypted),Yes,No}, xtick=data, nodes near coords, @@ -385,7 +405,7 @@ As for dedicated servers, we got our eyes on Ovh, which would propose to rent se \end{center} \vspace{5em} -\vspace{-3em}\hspace{-1.4em}Privacy is a notion of everyday. Everyday people use object made to guarantee some, from curtains to acoustic insulation, from locked doors to security cabinets, privacy is something that concerns doctors, lawyers, engineers, inventors, chefs, military staff\ldots{} +\vspace{-3em}\hspace{-1.4em}Privacy is a daily concern. Everyday people use objects made to guarantee some levels of it, from curtains to acoustic insulation, from locked doors to security cabinets, privacy is something that concerns doctors, lawyers, engineers, inventors, chefs, military staff\ldots{} But also each and everyone to some degree. Sometime privacy is an indirect concern: an archival company should not take a peek at your doctor's or lawyer's files and cases. Sometimes such an indirect concern reaches to be a concern of someone through friends, business partners, lovers\ldots{} You would not want someone to learn your friend's secrets through you. @@ -401,7 +421,7 @@ Those conditions may also tell a person to who the data sent on their service be This is in my opinion problematic from a moral standpoint when it is not explicit that the service acquires your information with your consent but on terms you may not entirely agree with for the simple reason that those terms are buried into a huge quantity of legal information. -The projection of that issue is when the very same terms and conditions allow for the company to sell or provide the information, generally non-anonymized, to a third party without additional demand for consent. This id extremely common in companies that offer services "for free" or for very low prices compared to the cost of the actual service. +The projection of that issue is when the very same terms and conditions allow for the company to sell or provide the information, generally non-anonymized, to a third party without additional demand for consent. This is extremely common in companies that offer services "for free" or for very low prices compared to the cost of the actual service. \section{On advertisement} @@ -435,7 +455,7 @@ Nowadays, most of the web communications are encrypted and at least partially au This doesn't mean that any data sent to those services is encrypted once stored on the provider: most providers do not store data encrypted as it brings computing costs up by a very significant margin if they need to access that data. -Similarly, it is considered bad practice to store passwords in a readable format, to protect them, specific cryptographic techniques exist so that it is possible to verify a password from a form of said password transformed with a one way transformation named a cryptographic hash function. That said, some companies still store password in readable form in their databases. +Similarly, it is considered bad practice to store passwords in a readable format, to protect them, specific cryptographic techniques exist so that it is possible to verify a password from a form of said password transformed with a one way transformation named a cryptographic hash function. That being said, some companies still store password in readable form in their databases. This means that, access should be compromised on the database of a company or within any vulnerable part of their computer system, data could be entirely compromised. This has happened a lot in recent years, and is bound to be a phenomena that multiplies should companies not start caring for their customer's privacy. @@ -457,13 +477,13 @@ We want to provide an online storage with the following properties: \vspace{0.6em}First of all, it must be georeplicated. It is not okay to lose service access due to the loss of one server farm on our own side. -\vspace{0.3em}Then, the data must be protected, we ourselves should be entirely unable to read it, we should also be unable to read the metadata. +\vspace{0.3em}Then, the data must be protected, we ourselves should be entirely unable to read it, we should also be unable to read the metadata that is not absolutely required to provide the service. -\vspace{0.3em}Also, any part of the data must be fast enough to access that it is hard to differentiate our service from access of a hard-drive given a good enough network connection, same goes for writing. +\vspace{0.3em}Also, any part of the data must be fast enough to access that it is hard to differentiate our service from access of an encrypted hard-drive given a good enough network connection, same goes for writing. \vspace{0.3em}Finally, it must be flexible and adaptable to multiple use-cases. -\vspace{1em}This leads us to the following idea: we are aiming to create a service that can store encrypted data, it must be able to store it in a layout similar to a disk, this way it possesses the same capabilities as a hard drive disk. The key to decipher the data is stored online but encrypted with the user password. Authentication requires the user to be able to read the password to get a token. It is possible to leave said token disabled and enable it only with a second authentication factor. +\vspace{1em}This leads us to the following idea: we are aiming to create a service that can store encrypted data, it must be able to store it in a layout similar to a disk, this way it possesses the same capabilities as a hard drive disk. The key to decipher the data is stored online but encrypted using a key derived from the user password. Authentication requires the user to be able to read the password to get a token. It is possible to leave said token disabled and enable it only with a second authentication factor. We want our system to be protected from the point of view of our customers, as such, we aim at it having a code-base readable and short enough to be explored completely in 3 days by a developer with access to enough documentation. @@ -483,6 +503,12 @@ Furthermore, the labels of each block of data can be used as a piece of the encr As for the openness principle, it is just as stated, we will disclose any demand that are made as soon as they are made as well as our responses to them. We will disclose any security issue or concern we receive. We will provide tools for anyone to be informed of these information through multiple channels. +\section{Consequences of those principles in the design} + +The first consequence of that design is that it is impossible for us to decipher data sent to us. We however had a trade-off to make to maintain a healthy performance for reading or writing sequential data of considerable size: if a big file is written at once and the client send the data in order, the big file will be stored approximately sequentially in our database. This can be however be blurred by not writing the big file in order. + +Another consequence is that the most permissions that can be handled on a block (a unit of a file system) is making it either non-readable, read-only, or write, and that this unit of file-system is not suitable to implement in system access control (for example, Microsoft Windows permissions, Linux system permissions\ldots{}), meaning those are to be enforced by the client computer. Whatever the permissions, this means that having any file-system write permission allows a user to perform any operation on the file-system as a whole. + \section{Data life cycle} Here is a list of the data that may be collected by us in any interaction with our software. This data is sorted by interaction. @@ -571,6 +597,8 @@ Companies with open business practices have in my opinion the greatest chance of \item Can be version-controlled \item Can store a pointer to a root \item Do not permit retrieval of key alone + \item Can be encrypted with a password key generator + \item Can be encrypted with a one-time pad (USB drive base OTP) \end{itemize} \section{System root header} @@ -578,7 +606,8 @@ Companies with open business practices have in my opinion the greatest chance of \begin{itemize}[label={\Square}] \item Is extendable \item Can store a pointer to a device and its type - \item Can store an attribute pointer to for a device + \item Can store an attribute pointer for a device + \item The root is on a non-shared block device and device UUID. \end{itemize} \section{Argus block device} @@ -616,43 +645,24 @@ Let $n$ be the number of files in the file system. Let $m$ be the size of a give \item Permissions are handled by the client only \item User 0 (root) can alter any permission \end{itemize} - \item NT permissions may not be implemented + \item NT permissions MAY NOT be implemented \begin{itemize}[label={\Square}] - \item NT clients may not perform ACL checks + \item NT clients MAY NOT perform ACL checks \item Any NT user can alter file data \item NT users can not alter file permissions \item Items created by NT users are created with the permission of their parent item \end{itemize} -\end{itemize} - -\chapter{Front-end capabilities} -\section{Web interface} -\begin{itemize}[label={}] - \item Account management + \item Verifies the long term lock of devices before use every 30 seconds at least + \item Long term lock a device \begin{itemize}[label={\Square}] - \item Can create an account - \item Can share a 2FA secret - \item Can delete an account - \item Can confirm a payment - \item Can inform of payment lateness - \item Can self-wipe account + \item The lock is marked immediately + \item The lock is active after 61 seconds of waiting have passed + \item[] This is made for critical maintenance and version upgrade scripts to be able to run properly. \end{itemize} - - \item Authentication - \begin{itemize}[label={\Square}] - \item Can generate a token from a connection request - \item Can confirm a token with a 2FA subtoken - \item Can invalidate a token - \item Can generate a shared secret to handle UDP connections - \item Can regenerate a password block from an older password block - \end{itemize} - - \item Provides a payment link - \item Provides a link to this document - \item Provides all personal information stored on our side for the logged in user - \item Provide a link to our company balance \end{itemize} + + \section{Heavy-clients} \begin{itemize}[label={\Square\Square}] @@ -791,7 +801,7 @@ Those cosmetic items must have a textual fallback accessible by hovering their e On \autoref{fig:mockup-desktopmain}, the circle and pie will actually be replaced by a render graphic whose inner part will spin if and only if time synchronization is happening normally, the outer part will grow brighter if elements of progress are made on the selected file system, and the inner part with transition between green and red through yellow showing data congestion. -Other render elements could be added to the application for aesthetic purposes, at the sole condition they provide a visually appealing way to understand the status of the system. +Other render elements could be added to the application for aesthetic purposes, at the sole condition they provide a visually appealing way to understand the status of the system or provide meaningful information about the system (updates, maintenances\ldots{}). \begin{figure}[H] \centering @@ -823,22 +833,37 @@ Behaviour of the login part of the page is expected to be identical to the behav \label{fig:mockup-newfs} \end{figure} +\chapter{Front-end capabilities} +\section{Web interface} +\begin{itemize}[label={\Square}] + \item[] Account management + \begin{itemize}[label={\Square}] + \item Can create an account + \item Can share a 2FA secret + \item Can delete an account + \item Can confirm a payment + \item Can inform of payment lateness + \item Can self-wipe account + \end{itemize} + + \item[] Authentication + \begin{itemize}[label={\Square}] + \item Can generate a token from a connection request + \item Can confirm a token with a 2FA subtoken + \item Can invalidate a token + \item Can generate a shared secret to handle UDP connections + \item Can regenerate a password block from an older password block + \end{itemize} + + \item Provides a payment link + \item Provides a link to this document + \item Provides all personal information stored on our side for the logged in user + \item Provide a link to our company balance +\end{itemize} + \section{Data interface} \begin{itemize}[label={\Square}] - \item[] Supports actions - \begin{itemize}[label={\Square}] - \item Read - \item Write - \item Update - \item Allocate - \item Allocate and push - \item Read and push - \item Pop and write - \item Delete - \item "Delete" and push - \item Assert timestamp - \item Commit - \end{itemize} + \item Supports data interaction actions \item Can execute a chain of actions \item Can confirm a chain of actions \item Can clear unconfirmed actions @@ -848,6 +873,7 @@ Behaviour of the login part of the page is expected to be identical to the behav \begin{itemize}[label={\Square}] \item Can lock and unlock a named mutex \item Can compare and swap a semaphore + \item Can provide a list of the other gateways \end{itemize} \item[] Administration \begin{itemize}[label={\Square}] @@ -856,46 +882,66 @@ Behaviour of the login part of the page is expected to be identical to the behav \item Can list users \item Can list connections \item Can wipe account - \item Can register a backend + \item Can register a spare data server. MUST be leader. + \item Can register a spare coordination server. MUST be leader. \end{itemize} \item[] Automation \begin{itemize}[label={\Square}] \item Can replace a dead server - \item Will duplicate if spares available, preferring $A \rightarrow B \rightarrow AB$ + \item Will duplicate if spares available following the rules below: + + If any category $N$ of $A$, $B$ or $AB$ has less than 1 server, make a copy of $N$; + \\ + else if there are as many of $A$, $B$ and $AB$, copy an $A$ server; + \\ + else if there are more $A$ than $B$, copy a $B$ server; + \\ + else if there are more $B$ than $AB$, copy an $AB$ server. \end{itemize} \end{itemize} +\section{Leadership interface} + +\section{Alternative interface} + \chapter{Back-end capabilities} \section{GoJ Database} \begin{itemize}[label={\Square}] \item[] Data \begin{itemize}[label={\Square}] + \item Can operations in bulks (see operation codes lists) \item Can read a record \item Can write a record \item Can confirm a record + \item Can validate a record \item Can remove a record \item Can test existence of a record \item Can allocate a record \item Can read a record metadata \end{itemize} - \item[] Stats + \item[] Statistics \begin{itemize}[label={\Square}] - \item Can provide stats given the stats format - \item Can alert in case of suspicious stats + \item Can provide statistics given the statistics format + \item Can alert in case of suspicious statistics \end{itemize} \item Can stream a list of all records, then stream all transformations that happened since the streaming started \end{itemize} \section{Performance requirements} \begin{itemize}[label={\Square}] - \item Less than 20\% below the server disk performance in terms of latency + \item Less than 20\% below the server disk performance in terms of latency of page reads \item Assumes available RAM to be at least 0.4\% of the disk capacity + \item Assumes a POSIX environment (Both Linux and OpenBSD should be supported as first class targets) \end{itemize} \part{Technical specification} \chapter{Protocols} +\section{About \texttt{Andrew}} + +Most of the code to implement the protocols is expected to use a small code generator named Andrew that generate parsers from YAML files. + \section{\texttt{izaro-storage} queries} \begin{figure}[H] @@ -905,13 +951,14 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitbox{14}{\texttt{unused}} \bitbox{1}{\texttt{\footnotesize 2S}} \bitbox{1}{\texttt{\footnotesize B}} - \bitbox{16}{\texttt{operation code (big endian)}} \\ + \bitbox{16}{\texttt{operation code}} \\ \wordbox{2}{\texttt{request identifier}} \\ \wordbox{1}{\textit{optional\ldots{}} \texttt{continuation}} \end{bytefield} \begin{itemize} \item[] \texttt{2S}: the operation is two-stepped (requires a confirmation to be applied) if set \item[] \texttt{B}: the operation is a bulk operation if set + \item[] operation code is a big endian integer see page \pageref{fig:gp_class_diagram_regulated} \end{itemize} \caption{Common request format (storage)} \label{fig:common_format_storage} @@ -922,8 +969,8 @@ Behaviour of the login part of the page is expected to be identical to the behav \begin{bytefield}[bitwidth=1.06em]{32} \bitheader{0-31} \\ - \wordbox{1}{$x$ (Big endian integer)} \\ - \wordbox{1}{$y$ (Big endian integer)} \\ + \wordbox{1}{$x$ (Big endian integer) (see page \pageref{fig:gp_class_diagram_regulated})} \\ + \wordbox{1}{$y$ (Big endian integer) (see page \pageref{fig:gp_class_diagram_regulated})} \\ \wordbox{4}{\texttt{UUID}} \end{bytefield} \begin{itemize} @@ -940,10 +987,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitheader{0,31,63} \\ \wordbox{2}{\texttt{Common request format (storage)}} \\ \wordbox{3}{\texttt{Record identifier}} \\ - \wordbox{1}{\textit{optional\ldots{}} \texttt{Timestamp (Big endian integer)}} \\ + \wordbox{1}{\texttt{Timestamp (Big endian integer)} (see page \pageref{fig:gp_class_diagram_regulated})} \\ \end{bytefield} \begin{itemize} - \item[] Timestamp of 0 or absent means latest valid value + \item[] Timestamp of 0 means latest valid value \end{itemize} \caption{Request format for read (storage)} \label{fig:read_format_storage} @@ -961,10 +1008,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \wordbox[lrt]{1}{\texttt{Database page}} \\ \skippedwords \\\wordbox[lrb]{1}{} \end{rightwordgroup} \\ - \wordbox{1}{\textit{optional\ldots{}} \texttt{Timestamp (Big endian integer)}} + \wordbox{1}{\texttt{Timestamp (Big endian integer)} (see page \pageref{fig:gp_class_diagram_regulated})} \end{bytefield} \begin{itemize} - \item[] Timestamp of 0, absent, of maxed as of \footnotesize{\texttt{std::numeric\_limits::max()}} means server time should be used + \item[] Timestamp of 0 or maxed as of \footnotesize{\texttt{std::numeric\_limits::max()}} means server time should be used \end{itemize} \caption{Request format for write (storage)} \label{fig:write_format_storage} @@ -977,10 +1024,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitheader{0,31,63} \\ \wordbox{2}{\texttt{Common request format (storage)}} \\ \wordbox{3}{\texttt{Record identifier}} \\ - \wordbox{1}{\texttt{Timestamp (Big endian integer)}} \\ + \wordbox{1}{\texttt{Timestamp (Big endian integer)} (see page \pageref{fig:gp_class_diagram_regulated})} \\ \end{bytefield} \begin{itemize} - \item[] An invalid timestamp does nothing + \item[] An invalid timestamp does triggers behaviours as of page \pageref{time_sync_equation} \end{itemize} \caption{Request format for confirm (storage)} \label{fig:confirm_format_storage} @@ -993,10 +1040,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitheader{0,31,63} \\ \wordbox{2}{\texttt{Common request format (storage)}} \\ \wordbox{3}{\texttt{Record identifier}} \\ - \wordbox{1}{\texttt{Timestamp (Big endian integer)}} \\ + \wordbox{1}{\texttt{Timestamp (Big endian integer)} (see page \pageref{fig:gp_class_diagram_regulated})} \\ \end{bytefield} \begin{itemize} - \item[] An invalid timestamp does nothing, a zeroed timestamp or absent timestamp means remove all, a specific value removes the target value. + \item[] An invalid timestamp implies the request is invalid, a zeroed timestamp means remove all, a specific value removes the target value. \end{itemize} \caption{Request format for remove (storage)} \label{fig:remove_format_storage} @@ -1013,7 +1060,7 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitbox{32}{\texttt{Size}} \\ \begin{rightwordgroup}{Repeats \\ \texttt{Size} times} \wordbox{6}{\texttt{Record identifier}} \\ - \wordbox{2}{\texttt{Timestamp (Big endian integer)}} + \wordbox{2}{\texttt{Timestamp (Big endian integer)} (see page \pageref{fig:gp_class_diagram_regulated})} \end{rightwordgroup} \\ \end{bytefield} \begin{itemize} @@ -1028,10 +1075,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \begin{bytefield}[bitwidth=1.06em]{32} \bitheader{0,31} \\ \wordbox{4}{\texttt{Common request format (storage)}} \\ - \bitbox{32}{IP address}\\ - \bitbox{16}{UDP port} + \bitbox{32}{IP address (see page \pageref{fig:gp_class_diagram_regulated})}\\ + \bitbox{16}{UDP port (see page \pageref{fig:gp_class_diagram_regulated})} \bitbox[lrt]{16}{} \\ - \bitbox[lr]{32}{Timestamp} \\ + \bitbox[lr]{32}{Timestamp (see page \pageref{fig:gp_class_diagram_regulated})} \\ \bitbox[lrb]{16}{} \bitbox[t]{16}{} \\ @@ -1048,9 +1095,9 @@ Behaviour of the login part of the page is expected to be identical to the behav \begin{bytefield}[bitwidth=1.06em]{32} \bitheader{0,31} \\ \wordbox{4}{\texttt{Common request format (storage)}} \\ - \bitbox{16}{\texttt{UDP port}} + \bitbox{16}{\texttt{UDP port} (see page \pageref{fig:gp_class_diagram_regulated})} \bitbox[lrt]{16}{} \\ - \bitbox[lr]{32}{\texttt{Timestamp}} \\ + \bitbox[lr]{32}{\texttt{Timestamp} (see page \pageref{fig:gp_class_diagram_regulated})} \\ \bitbox[lrb]{16}{} \bitbox{1}{\texttt{N}} \bitbox{15}{\texttt{Unused}} \\ @@ -1064,6 +1111,32 @@ Behaviour of the login part of the page is expected to be identical to the behav \label{fig:stream_reads_request} \end{figure} +\section{Operation codes} + +\begin{itemize} + \item $0$=Statistics + \item $1$=Read + \item $2$=Write + \item $3$=Confirm + \item $4$=Validate + \item $5$=Allocate + \item $6$=Meta read + \item $129$=Read $n$ + \item $130$=Write $n$ + \item $131$=Confirm $n$ + \item $132$=Validate $n$ + \item $133$=Allocate $n$ + \item $134$=Meta read $n$ + \item $257$=Unconfirmed read + \item $262$=Meta unconfirmed read + \item $383$=Unconfirmed read $n$ + \item $388$=Meta unconfirmed read $n$ + \item $513$=Uncommitted read + \item $518$=Meta uncommitted read + \item $641$=Uncommitted read $n$ + \item $646$=Meta uncommitted read $n$ +\end{itemize} + \section{\texttt{izaro-storage} replies} \begin{figure}[H] @@ -1072,15 +1145,17 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitheader{0,31,63} \\ \wordbox{3}{\texttt{Record identifier}} \\ - \bitbox{64}{\texttt{Timestamp}} \\ - \bitbox{64}{\texttt{Offset}} \\ - \bitbox{30}{\texttt{Unused}} + \bitbox{64}{\texttt{Timestamp} (see page \pageref{fig:gp_class_diagram_regulated})} \\ + \bitbox{64}{\texttt{Offset} (see page \pageref{fig:gp_class_diagram_regulated})} \\ + \bitbox{29}{\texttt{Unused}} + \bitbox{1}{\texttt{\tiny V}} \bitbox{1}{\texttt{\tiny R}} \bitbox{1}{\texttt{\tiny C}} \end{bytefield} \begin{itemize} \item[] \texttt{R}: is set if the record is removed (not eligible for cleanup) \item[] \texttt{C}: is set if the record is confirmed + \item[] \texttt{V}: is set if the record is validated/committed \end{itemize} \caption{Record format (storage)} \label{fig:record_storage} @@ -1133,10 +1208,10 @@ Behaviour of the login part of the page is expected to be identical to the behav \bitbox{64}{\texttt{average duration of synchronizations}} \\ \end{bytefield} \begin{itemize} - \item[-] all values are big endian + \item[-] all values are big endian (see page \pageref{fig:gp_class_diagram_regulated}) \item[-] all times are in \si{\micro\second} \end{itemize} - \caption{Stats reply format (storage)} + \caption{Statistics reply format (storage)} \label{fig:stats_format_reply_storage} \end{figure} @@ -1167,40 +1242,40 @@ Behaviour of the login part of the page is expected to be identical to the behav \subsection{User payloads} -\begin{figure}[H] - \centering - \begin{bytefield}[bitwidth=0.48em]{64} - - \bitheader{0,31,63} \\ - \wordbox{3}{\texttt{record identifier}} \\ - \wordbox{1}{\texttt{timestamp}} \\ - \end{bytefield} - \begin{itemize} - \item[] the data fetched is the last before the timestamp - \item[] the timestamp is aligned on the coordinator's timestamping - \item[] if the timestamp is omitted the last confirmed page is read - \end{itemize} - \caption{Read request} - \label{fig:read_request} -\end{figure} - -\begin{figure}[H] - \centering - \begin{bytefield}[bitwidth=0.48em]{64} - - \bitheader{0,31,63} \\ - \begin{rightwordgroup}{$32Kio$} - \bitbox[lrt]{64}{} \\ - \wordbox[lr]{1}{\vspace{0.96em}\texttt{file page}} \\ - \skippedwords \\\wordbox[lrb]{1}{} - \end{rightwordgroup} \\ - \end{bytefield} - \begin{itemize} - \item[] the data will be stored on the time of the server - \end{itemize} - \caption{Allocate$+$Write request} - \label{fig:allocate_write_request} -\end{figure} +%\begin{figure}[H] +% \centering +% \begin{bytefield}[bitwidth=0.48em]{64} +% +% \bitheader{0,31,63} \\ +% \wordbox{3}{\texttt{record identifier}} \\ +% \wordbox{1}{\texttt{timestamp}} \\ +% \end{bytefield} +% \begin{itemize} +% \item[] the data fetched is the last before the timestamp +% \item[] the timestamp is aligned on the coordinator's timestamping +% \item[] if the timestamp is omitted the last confirmed page is read +% \end{itemize} +% \caption{Read request} +% \label{fig:read_request} +%\end{figure} +% +%\begin{figure}[H] +% \centering +% \begin{bytefield}[bitwidth=0.48em]{64} +% +% \bitheader{0,31,63} \\ +% \begin{rightwordgroup}{$32Kio$} +% \bitbox[lrt]{64}{} \\ +% \wordbox[lr]{1}{\vspace{0.96em}\texttt{file page}} \\ +% \skippedwords \\\wordbox[lrb]{1}{} +% \end{rightwordgroup} \\ +% \end{bytefield} +% \begin{itemize} +% \item[] the data will be stored on the time of the server +% \end{itemize} +% \caption{Allocate$+$Write request} +% \label{fig:allocate_write_request} +%\end{figure} \subsection{Root user payloads} @@ -1225,6 +1300,7 @@ Behaviour of the login part of the page is expected to be identical to the behav \section{\texttt{izaro-coordinate} timing protocol and consensus} +Complementary information in the technical specification regarding constraints page \pageref{time_sync_chapter}. \begin{figure}[H] \centering @@ -1235,15 +1311,40 @@ Behaviour of the login part of the page is expected to be identical to the behav \mess[1]{a}{$t_{client}$}{b} \mess[1]{b}{$t_{client},t_{server}$}{a} \end{sequencediagram} + \caption{Izaro client time synchronization} + \label{fig:client_time_proto} +\end{figure} + +\begin{figure}[H] + \centering + \begin{sequencediagram} + \newthread{a}{: DataServer} + \newinst[7]{b}{: CoordServer} + + \mess[1]{a}{$t_{client}$}{b} + \mess[1]{b}{$t_{client},t_{server}$}{a} + \mess[1]{a}{$t_{client},t_{server},t_{return},t_{round}$}{b} + \mess[1]{b}{$t_{client},t_{server},t_{return},t_{round},t_{sync}$}{a} + \end{sequencediagram} + \begin{enumerate} + \item $t_{client}$: Initial time as seen by the client + \item $t_{server}$: Time at reception of the first server packet as seen by the server + \item $t_{return}$: Time at reception of the first server reply by the data server + \item $t_{round}$: Time taken for a write round from the server + \item $t_{sync}$: An implementation defined calculated value derived from historical data from the protocol, e.g. + + $t_{sync}=(\overline{t_{return}-t_{client}+t_{round}})\times 1.7$ + \end{enumerate} \caption{Izaro time synchronization} \label{fig:time_proto} \end{figure} + \begin{figure}[H] \centering \begin{sequencediagram} - \newthread{a}{: Client \#1} - \newinst[7]{b}{: Server} + \newthread{a}{: CoordServer} + \newinst[7]{b}{: DataServer} \mess[1]{a}{$t_{1}$ data write}{b} \mess[1]{b}{unconfirmed record}{a} @@ -1251,14 +1352,59 @@ Behaviour of the login part of the page is expected to be identical to the behav \end{callself} \mess[1]{a}{$t_{1} - t_{sync} < x < t_{2}$ unconfirmed read}{b} \mess[1]{b}{same unconfirmed record}{a} - \mess[1]{a}{confirm record}{b} - \mess[1]{b}{confirmed record}{a} + \mess[1]{a}{validate record}{b} + \mess[1]{b}{validated record}{a} \end{sequencediagram} - \caption{Izaro single user write confirmation} + \caption{Izaro single write confirmation} \label{fig:confirmation_proto} \end{figure} +\begin{figure}[H] + \centering + \begin{sequencediagram} + \newthread{a}{: Client} + \newinst[4]{b}{: CoordServer} + \newinst[4]{c}{: DataServer} + + \mess[1]{a}{try lock id}{b} + \begin{callself}{b}{check lock}{return success} + \end{callself} + \mess[1]{b}{log lock state id}{c} + \begin{callself}{b}{wait $t_{sync}$}{return;} + \end{callself} + \mess[1]{b}{return lock success}{a} + \prelevel\prelevel + \prelevel\prelevel + \begin{callself}{c}{save lock}{return;} + \end{callself} + \postlevel\postlevel + \postlevel + \end{sequencediagram} + \caption{Izaro synchronization information logging (success)} + \label{fig:synchro_log} +\end{figure} + +\begin{figure}[H] + \centering + \begin{sequencediagram} + \newthread{a}{: Client} + \newinst[4]{b}{: CoordServer} + \newinst[4]{c}{: DataServer} + + \mess[1]{a}{try lock id}{b} + \begin{callself}{b}{check lock}{return failure} + \end{callself} + \mess[1]{b}{log lock failure}{c} + \prelevel\prelevel + \mess[1]{b}{return lock failure}{a} + \begin{callself}{c}{log failure}{return;} + \end{callself} + \end{sequencediagram} + \caption{Izaro synchronization information logging (failure)} + \label{fig:synchro_log_fail} +\end{figure} + \begin{figure}[H] \centering \begin{footnotesize} @@ -1276,8 +1422,8 @@ Behaviour of the login part of the page is expected to be identical to the behav \mess[1]{da}{$t_{1} - t_{sync} < x < t_{2}$ unconfirmed read}{dc} \mess[1]{dc}{self unconfirmed record is first}{da} \postlevel - \mess[1]{da}{confirm record}{dc} - \mess[1]{dc}{confirmed record}{da} + \mess[1]{da}{validate record}{dc} + \mess[1]{dc}{validated record}{da} \prelevel\prelevel \prelevel\prelevel \prelevel\prelevel @@ -1301,31 +1447,1168 @@ Behaviour of the login part of the page is expected to be identical to the behav \end{sequencediagram} \end{footnotesize} - \caption{Izaro dual user write confirmation and cancellation} + \caption{Izaro two writes commit/cancellation decision} \label{fig:2user_confirmation_proto} \end{figure} +\begin{figure}[H] + \centering + \begin{minipage}[t][\textheight-2\fboxsep-2\fboxrule][t]{0.7\textwidth} + \begin{footnotesize} + \begin{sequencediagram} + \newinst{db}{: Client} + \newinst[4]{da}{: CoordServer} + \newinst[4]{dc}{: DataServer} + + \mess[1]{db}{send query}{da} + \mess[1]{da}{$t_{1}$ data write}{dc} + \mess[1]{dc}{unconfirmed record $A$}{da} + \prelevel + \prelevel + \begin{callself}{da}{wait $t_{sync}$}{return $t_{2}$} + \postlevel + \end{callself} + \mess[1]{da}{$t_{1} - t_{sync} < x < t_{2}$ unconfirmed read}{dc} + \mess[1]{dc}{self unconfirmed record is first}{da} + \mess[1]{da}{confirm record}{dc} + \mess[1]{dc}{confirmed record}{da} + \mess[1]{da}{wait continuation}{db} + \mess[1]{db}{send continuation query}{da} + \mess[1]{da}{$t_{3}$ data write}{dc} + \mess[1]{dc}{unconfirmed record $B$}{da} + \prelevel + \prelevel + \begin{callself}{da}{wait $t_{sync}$}{return $t_{4}$} + \postlevel + \end{callself} + \mess[1]{da}{$t_{1} - t_{sync} < x < t_{4}$ unconfirmed read}{dc} + \mess[1]{dc}{self unconfirmed record is first}{da} + \mess[1]{da}{commit record}{dc} + \mess[1]{dc}{committed record}{da} + \mess[1]{da}{return}{db} + \end{sequencediagram} + \end{footnotesize} + \end{minipage} + \caption{Izaro two queries confirmation and validation} + \label{fig:2query_confirmation_proto} +\end{figure} + \chapter{Storage layer} + \section{\texttt{izaro-storage}} -\section{\texttt{db\_{}stats}} + + \begin{itemize}[label={\Square}] + \item Receive a query + \item Route a query + \begin{itemize}[label={\Square}] + \item Route to read + \item Route to write + \item Route to confirm + \item Route to commit/validate + \item Route to remove + \item Route to test + \item Route to allocate + \item Route to initiate streaming + \item Route to log lock information + \end{itemize} + \item Preemptively perform time synchronization queries + \item Perform side-effect + \begin{itemize}[label={\Square}] + \item Write + \item Confirm + \item Commit/validate + \item Remove + \item Route to test + \item Allocate + \item Streaming initiation + \item Log lock information + \end{itemize} + \item Perform response + \begin{itemize}[label={\Square}] + \item Respond to read + \item Respond to write + \item Respond to confirm + \item Respond to commit/validate + \item Respond to remove + \item Respond to test + \item Respond to allocate + \item Respond to initiate streaming + \item Responses are cached for at least 1 second + \item Responses are handles as a outgoing queue + \end{itemize} + \end{itemize} + + \begin{itemize}[label={\Square}] + \item Can provide statistics given the statistics format + \item Statistics are readable as column data + \item Statistics items values start after the 16th byte of each line + \item Statistic items have units if applicable + \end{itemize} + + + + +\section{\texttt{gplib}: General Purpose POSIX based tools library} + \begin{itemize}[label={\Square}] + \item Managed file descriptor + \item Managed memory mapping + \item Managed stored array + \item Managed stored hash map + \item Managed stored indexed array + \item Regulated to big endian integer + \item Single Write Multiple Read adapter for containers + \item Region based MT adapter for containers + \end{itemize} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.80\textwidth]{shared\string_fd}{0,14} + \attribute{- fd : int} + \attribute{- last\string_error : int} + \attribute{- guard : std::atomic\string_int*} + + \operation{+ shared\string_fd()} + \operation{- shared\string_fd( fd : int )} + \operation{+ \string~shared\string_fd()} + \operation{+ is\string_valid() : bool} + \operation{+ has\string_failed() : bool} + \operation{+ value() : int} + \operation{+ read( buffer : std::string\string_view ) : std::string\string_view} + \operation{+ write( buffer : std::string\string_view ) : std::string\string_view} + \operation{+ connect( addr : address )} + \operation{+ bind( addr : address )} + \operation{+ send( buffer : std::string\string_view, addr : address )} + \operation{+ receive( buffer : std::pair ) : std::string\string_view} + \operation{+ accept() : shared\string_fd} + \operation{\underline{+ create( filename : std::string\string_view, mode : int ) : shared\string_fd}} + \operation{\underline{+ open( filename : std::string\string_view, flags : int ) : shared\string_fd}} + \operation{\underline{+ socket( domain : int, proto : int, flags : int ) : shared\string_fd}} + \operation{\underline{+ unix\string_socket( proto : int ) : shared\string_fd}} + \operation{\underline{+ unix\string_socket\string_pair( proto : int ) : std::pair}} + \end{class} + + \begin{class}[text width=0.80\textwidth]{shared\string_mmap}{0,0} + \attribute{- off : size\string_t} + \attribute{- sz : size\string_t} + \attribute{- ptr : void*} + \attribute{- guard : std::atomic\string_int*} + + \operation{+ shared\string_mmap()} + \operation{+ shared\string_mmap( fd : shared\string_fd, size : size\string_t, offset : size\string_t )} + \operation{+ \string~shared\string_mmap()} + \operation{+ operator()$<$typename T$>$() : T*} + \operation{+ size() : size\string_t} + \operation{+ begin() : uint8\string_t*} + \operation{+ end() : uint8\string_t*} + \operation{+ offset() : size\string_t} + \operation{+ advise(adv\string_value)} + \end{class} + + \composition{shared\string_mmap}{fd}{1}{shared\string_fd} + \end{tikzpicture} + \caption{\texttt{gplib} shared\_{}fd and shared\_{}mmap} + \label{fig:gp_class_diagram_mmap} + \end{figure} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.80\textwidth]{shared\string_fd}{0,6} + \end{class} + + \begin{class}[text width=0.80\textwidth]{shared\string_mmap}{0,3} + \end{class} + + \begin{class}[text width=0.80\textwidth]{stored\string_cyclic\string_queue}{0,0} + \attribute{+ $<<$T$>>$ : Type} + \attribute{+ $<<$is\string_mt\string_safe$>>$ : bool} + \attribute{- guard : std::atomic\string_int*} + + \operation{+ stored\string_cyclic\string_queue()} + \operation{+ \string~stored\string_cyclic\string_queue()} + \operation{\underline{+ create( filename, elem\string_count) : stored\string_cyclic\string_queue}} + \operation{\underline{+ open( filename : std::string\string_view ) : stored\string_cyclic\string_queue}} + \operation{+ push( elem : T )} + \operation{+ has\string_looped() : bool} + \operation{+ size() : size\string_t} + \operation{+ elem\string_size() : size\string_t} + \operation{+ begin() : iterator$<$T$>$} + \operation{+ end() : iterator$<$T$>$} + \end{class} + + \composition{shared\string_mmap}{fd}{1}{shared\string_fd} + \composition{stored\string_cyclic\string_queue}{metadata, data\string_buffer}{2}{shared\string_mmap} + \end{tikzpicture} + \caption{\texttt{gplib} stored\_{}cyclic\_{}queue} + \label{fig:gp_class_diagram_scq} + \end{figure} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.80\textwidth]{shared\string_fd}{0,6} + \end{class} + + \begin{class}[text width=0.80\textwidth]{shared\string_mmap}{0,3} + \end{class} + + \begin{class}[text width=0.80\textwidth]{stored\string_indexed\string_array}{0,0} + \attribute{+ $<<$T$>>$ : Type} + \attribute{+ $<<$is\string_mt\string_safe$>>$ : bool} + \attribute{- guard : std::atomic\string_int*} + + \operation{+ stored\string_indexed\string_array()} + \operation{+ \string~stored\string_indexed\string_array()} + \operation{\underline{+ create( filename, elem\string_count) : stored\string_cyclic\string_queue}} + \operation{\underline{+ open( filename : std::string\string_view ) : stored\string_cyclic\string_queue}} + \operation{+ push( elem : T )} + \operation{+ pop(): T} + \operation{+ delete()} + \operation{+ size() : size\string_t} + \operation{+ elem\string_size() : size\string_t} + \operation{+ begin() : iterator$<$T$>$} + \operation{+ end() : iterator$<$T$>$} + \operation{+ operator[]( idx : size\string_t )} + \end{class} + + \composition{shared\string_mmap}{fd}{1}{shared\string_fd} + \composition{stored\string_indexed\string_array}{metadata, data\string_array, translation\string_array, reserve\string_array, delete\string_array}{5}{shared\string_mmap} + \end{tikzpicture} + \caption{\texttt{gplib} stored\_{}indexed\_{}array} + \label{fig:gp_class_diagram_sia} + \end{figure} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.80\textwidth]{shared\string_fd}{0,6} + \end{class} + + \begin{class}[text width=0.80\textwidth]{shared\string_mmap}{0,3} + \end{class} + + \begin{class}[text width=1.03\textwidth]{stored\string_hash\string_map}{0,0} + \attribute{+ $<<$Tk$>>$ : Type} + \attribute{+ $<<$Tv$>>$ : Type} + \attribute{\underline{+ $<<$TPred$>>$ $=$ std::function$<$std::optional$<$Tr$>$(std::pair)$>$ : Type}} + \attribute{+ $<<$is\string_mt\string_safe$>>$ : bool} + \attribute{- guard : std::atomic\string_int*} + + \operation{+ stored\string_hash\string_map()} + \operation{+ \string~stored\string_hash\string_map()} + \operation{\underline{+ create( filename, elem\string_count, rec\string_count, del\string_count) : stored\string_cyclic\string_queue}} + \operation{\underline{+ open( filename : std::string\string_view ) : stored\string_cyclic\string_queue}} + \operation{+ insert( key : Tk, value : Tv )} + \operation{+ get( key : Tk ): Tv} + \operation{+ get\string_or( key : Tk, or\string_v : Tv ): Tv} + \operation{+ get\string_filter( key : Tk, lambda : std::function ): Tv} + \operation{+ for\string_each\string_backlog$<$Tr$>$( key : Tk, lambda : Pred ): Tr} + \operation{+ delete( key : Tk )} + \operation{+ size() : size\string_t} + \operation{+ elem\string_size() : size\string_t} + \operation{+ begin() : iterator$<$T$>$} + \operation{+ end() : iterator$<$T$>$} + \end{class} + + \composition{shared\string_mmap}{fd}{1}{shared\string_fd} + \composition{stored\string_hash\string_map}{metadata, data\string_array, record\string_array, delete\string_array}{4}{shared\string_mmap} + \end{tikzpicture} + \caption{\texttt{gplib} stored\_{}hash\_{}map} + \label{fig:gp_class_diagram_shm} + \end{figure} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.90\textwidth]{regulated}{0,0} + \attribute{+ $<<$T$>>$ : Type} + \attribute{+ data : std::array$<$uint8\string_t, sizeof(T)$>$} + + \operation{+ regulated()} + \operation{+ regulated(value : T)} + \operation{+ \string~regulated()} + \operation{+ operator T() : T} + \end{class} + \end{tikzpicture} + \caption{\texttt{gplib} regulated to big endian value} + \label{fig:gp_class_diagram_regulated} + \end{figure} + + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.30\textwidth]{shared\string_fd}{0,6} + \end{class} + + \begin{class}[text width=0.30\textwidth]{shared\string_mmap}{0,3} + \end{class} + + \begin{class}[text width=0.30\textwidth]{stored\string_cyclic\string_queue}{-6,3} + \end{class} + + \begin{class}[text width=0.30\textwidth]{stored\string_indexed\string_array}{6,3} + \end{class} + + \begin{class}[text width=0.30\textwidth]{stored\string_hash\string_map}{6,6} + \end{class} + + \composition{shared\string_mmap}{}{}{shared\string_fd} + \composition{stored\string_hash\string_map}{}{}{shared\string_mmap} + \composition{stored\string_indexed\string_array}{}{}{shared\string_mmap} + \composition{stored\string_cyclic\string_queue}{}{}{shared\string_mmap} + \end{tikzpicture} + \caption{\texttt{gplib} container classes} + \label{fig:gp_class_diagram_global} + \end{figure} \chapter{Coordination layer} \section{Client to \texttt{izaro-coordinate}} + +Queries are performed asynchronously; queries SHOULD be terminated by a specified relatively short timeout. Queries are composed of a sequence of operations ordered and executed with two(2) stacks of caches used to run them. A query is not performed until all of its operations is performed. + +One of the caches is the query cache, a stack of operation executed from top to bottom. + +The other cache is the data cache, to which data is pushed to or pulled towards. + +Queries can be sliced in multiple packets affixed with an index and a size. The reply to a yet incomplete query will always be an error until all parts are received. The error reply will list the missing parts of the query. The reply to an unfinished query will always be an error except if it is in the waiting state. + +Replies MAY also be split in a similar fashion as queries. The client can at any moment request an index for an incomplete reply. Errors only provide back the stack position of the query stack and of the data stack. + +The query stack only contains operations, the data stack contains tagged unions of record identifiers, records and pages. + +Both the query stack and the data stack MUST be stored in memory mapped temporary files to prevent exceeding live memory capacities in a way that allows denial of service. Those temporaries MUST be on a form of live disk. + +Writes pop a record identifier then they pop a page. + +Reads push a page then a record identifier. + +Assertion expect a record identifier on top of the stack, but do not change it. + +Allocations push a record identifier. + +Large queries MAY be implemented to wait a larger timeout. + + \begin{itemize}[label={\Square}] + \item Receive a query + \item Provide a connection hook + \item Can read a raw connection block + \item Perform non query operations + \begin{itemize}[label={\Square}] + \item Perform a long term lock + \item Release a long term lock + \item Lock a guard + \item Unlock a guard + \item Lock on a named region lock + \item Unlock on a named region lock + \item Create a named region lock + \item Inflate named region lock + \item Deflate named region lock + \item Get named region lock statistics + \end{itemize} + \item Perform a query + \begin{itemize}[label={\Square}] + \item Preemptively perform time synchronization queries + \item Create a query stack + \item Prepare a query stack + \item Execute a query stack + \begin{itemize}[label={\Square}] + \item Assert timestamp, pop and write + \item Assert/pop record identifier + \item Assert/pop page + \item Allocate and push + \item Confirm and wait + \item Read and push + \item Pop and write + \item Pop and delete + \item Clear and push + \item Assert timestamp + \item Cancel + \item Commit + \item Query stack pop til size $n$ + \item Interweave $n$ elements with the $n$ next elements + \end{itemize} + \end{itemize} + \item Cleanup old data + \begin{itemize}[label={\Square}] + \item Cache + \item Timeout an old query + \item Incomplete query stack + \item List of past requests + \end{itemize} + \item Perform response + \begin{itemize}[label={\Square}] + \item Serialize a query stack state + \item Return a query stack state + \item Serialize a data stack state + \item Return a data stack state + \item Return a wait mark + \item Return a valid error + \end{itemize} + \item Keep track of user service usage + \begin{itemize}[label={\Square}] + \item Keep track of user write count + \item Keep track of user snapshots + \item Keep track of user delete count + \end{itemize} + \item Keep track of service activity + \begin{itemize}[label={\Square}] + \item Keep track of network usage + \item Keep track of non storage operations per minute + \item Keep track of storage operations per minute + \item In case of low storage operation per minute + \begin{itemize}[label={\Square}] + \item Trigger a cleanup up to filling the deletion table to a given threshold or complete cleanup + \item Perform a replica clone if an extra server is available + \end{itemize} + \item In case of high non storage operation per minute + \begin{itemize}[label={\Square}] + \item Request increasing the cyclic log size if the storage is planned to last less than a minute + \item Send a report of high usage of non storage + \end{itemize} + \item Log activity reports + \item Track handlers on exit and log statistics + \end{itemize} + \end{itemize} + +Region locks have a name, a slicing and a size. You can lock any position within the size rapidly, locking a whole slice of the region. Useful to lock over parts of files. + +The size of slices is fixed. Inflating the region lock adds slices to it. Deflating the region lock removes slices from it if and only if they are not locked, else it fails, a gradual deflation is advised unless you can expect huge slices to be unlocked. + +Coordination servers are responsible for slicing big data plans across data server constellations, trying to fit pages in all constellations of equally, even if that means assigning new UUIDs to a user on allocating operations if needed. + +\begin{figure}[h] + \centering + \begin{sequencediagram} + \newthread{a}{: CoordServer} + \newinst[4]{b}{: DataServer} + + \mess[1]{a}{Query}{b} + \begin{callself}{b}{Perform}{} + \end{callself} + \mess[1]{b}{Reply}{a} + \prelevel\prelevel + \prelevel\prelevel + \prelevel + \begin{callself}{a}{Timeout}{Results} + \postlevel\postlevel + \postlevel\postlevel + \end{callself} + \end{sequencediagram} + \caption{Successful query} + \label{fig:cli_to_coord_success} +\end{figure} + +\begin{figure}[H] + \centering + \begin{sequencediagram} + \newthread{a}{: Client} + \newinst[4]{b}{: Server} + + \mess[1]{a}{Query}{b} + \begin{callself}{b}{Perform}{} + \end{callself} + \prelevel\prelevel + \prelevel + \begin{callself}{a}{Timeout}{Error} + \postlevel\postlevel + \postlevel\postlevel + \end{callself} + \begin{sdblock}{loop}{while no results received} + \mess[1]{a}{Query with same id}{b} + \begin{callself}{b}{Perform}{Cached results} + \end{callself} + \mess[1]{b}{Reply?}{a} + \prelevel\prelevel + \prelevel\prelevel + \prelevel + \begin{callself}{a}{Timeout}{Results or Error} + \postlevel\postlevel + \postlevel\postlevel + \end{callself} + \end{sdblock} + \end{sequencediagram} + \caption{Delayed success query} + \label{fig:cli_to_coord_dsuccess} +\end{figure} + \section{\texttt{izaro-coordinate} to \texttt{izaro-storage}} -\chapter{Time synchronization} +List of writes MUST be kept and checked for consistency of timestamps before confirmation. They MUST be stored in memory mapped temporary files to prevent exceeding live memory capacities in a way that allows denial of service. + +If your operation took more than $t_{sync}$ to perform its confirmation MAY NOT be atomic and result in a form of corruption. Any operation exceeding a threshold of operations advised by the server MAY have these side effects. It is advised these operations use either a regional form of locking or use a per entity form of locking performed by the client side respectfully of locking constraints. The server MUST guarantee loosely that any operation that takes less than the advised number of steps is atomic and sequentially consistent. + +This MAY imply that only a group of operation that takes less than $t_{sync}$ will be atomic. + +Any operation that will take less than $t_{sync}$ to perform MUST be atomic by design or fails. + +\section{Coordination layer high-availability} + +Each coordination server in a cluster get attributed a number ID by the leader. This number ID must be greater than any number ID in the cluster. The leader is always the node with the lowest number ID. Only the leader can perform locking operations, new node inclusion and replication orders. + +In case of partition, each node is to try to contact the node with the lowest ID and register it as its potential leader. A leader will emerge if a strict majority of the nodes refer to it as leader.if a node fails to obtain said majority with a defined time frame, this node is to disconnect from the cluster and wait for a leader with lower ID to emerge, then seek for a new number ID from that leader. + +\section{\texttt{gplib}: General Purpose POSIX based tools library} + \begin{itemize}[label={\Square}] + \item Managed stored lock table + \item Managed stored circular log + \item Managed stored lock queue + \item Bloom-filter + \item Region locker + \end{itemize} + \begin{figure}[h] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.20\textwidth]{shared\string_fd}{-1,6} + \end{class} + + \begin{class}[text width=0.20\textwidth]{shared\string_mmap}{-1,3} + \end{class} + + \begin{class}[text width=0.20\textwidth]{stored\string_hash\string_map}{-1,0} + \end{class} + + \begin{class}[text width=0.40\textwidth]{lock\string_id}{-11,-3} + \attribute{value : std::array$<$char, 4096$>$} + \end{class} + + \begin{class}[text width=0.20\textwidth]{lock\string_key}{-5,6} + \attribute{value : uint64\string_t} + \end{class} + + \begin{class}[text width=0.40\textwidth]{lock\string_tout}{-11,3} + \attribute{timestamp : regulated$<$uint64\string_t $>$} + \end{class} + + \begin{class}[text width=0.30\textwidth]{exclusivity\string_t}{-11,0} + \attribute{value : uint8\string_t} + \attribute{\underline{free = 0}} + \attribute{\underline{exclusive = 0xFF}} + \end{class} + + \begin{class}[text width=0.30\textwidth]{locktable}{-1,-3} + \attribute{\underline{Tk = lock\string_id}} + \attribute{\underline{Tv = lock\string_info}} + \inherit{stored\string_hash\string_map} + \end{class} + + \begin{class}[text width=0.20\textwidth]{lock\string_info}{-5,-1} + \end{class} + + \composition{shared\string_mmap}{}{}{shared\string_fd} + \aggregation{locktable}{$<<$Type$>>$}{}{lock\string_id} + \aggregation{locktable}{$<<$Type$>>$}{}{lock\string_info} + \composition{stored\string_hash\string_map}{}{}{shared\string_mmap} + + \composition{lock\string_info}{id}{}{lock\string_id} + \composition{lock\string_info}{key}{}{lock\string_key} + \composition{lock\string_info}{timeout}{}{lock\string_tout} + \composition{lock\string_info}{state}{}{exclusivity\string_t} + \end{tikzpicture} + \caption{\texttt{gplib} lock classes} + \label{fig:gp_class_diagram_locks} + \end{figure} + +\section{Communication between coordination servers} + +Each server is to hold a certain amount of data about the cluster: + +\begin{itemize} + \item Index ID + \item List of the other coordination servers and some information about them: + \begin{itemize} + \item Index ID + \item A contact target (tuple of a domain or IP address and a port) + \end{itemize} + \item Index ID of the leader +\end{itemize} + +Any server with a lower Index ID (also referred as number ID) has execution authority on the servers below it. + +\subsection{Leadership} + +In a normal state, the leader is a server accessible from any coordination server with the lowest number ID. The leader is responsible for all locking operations within the cluster and for streaming the logs of those operations to be able to reconstruct the locking state of the cluster as accurately as possible in case of failure. + +In case the leader loses access to a majority of the connected servers during a time period defined in its configuration, the leader is to announce its drop of leadership and the cluster enters a degraded state. + +In degraded state, no lock or server replacement operation can be performed. Normal operations can still be performed for a configuration defined time in which it is considered the shared time state has not been degraded. In that state, the servers are to perform a partition resolution. + +\subsection{Partition resolution} + +In partition resolution mode, every server is to try to contact servers by Index ID order, and assign and send a partition resolution value that is dependent on how low the Index ID of the target server is in the list of reachable servers ordered by Index ID. If that server has a majority of servers that can reach it with a partition resolution value of 1, it is elected leader. If it doesn't but has a majority with both partition resolution values of 1 and 2, it gets elected leader. + +\begin{figure}[H] + \centering + \begin{sequencediagram} + \newinst{a}{: Server \#2} + \newinst[3]{b}{: Server \#3} + \newinst[3]{c}{: Server \#4} + + \mess[2]{b}{connect}{a} + \prelevel\prelevel\prelevel + \mess[2]{c}{connect}{a} + + \mess[2]{b}{$PTR=1$}{a} + \prelevel\prelevel\prelevel + \mess[2]{c}{$PTR=1$}{a} + + \begin{callself}{a}{Majority of $PTR=1$}{Set as leader} + \end{callself} + \mess[3]{a}{$Leader=2$}{b} + \prelevel\prelevel\prelevel\prelevel + \mess[3]{a}{$Leader=2$}{c} + \end{sequencediagram} + \caption{Best case PTR} + \label{fig:best_case_ptr} +\end{figure} + +Between every step, there must be a synchronization between parties that managed to communicate. + +\begin{table}[H] +\centering + \begin{tabular}{|c|c|c|c|c|} + \hline + & \#2 & \#3 & \#4 & \#5 \\ + \hline + \#2 & X & X & X & X \\ + \hline + \#3 & 1 & X & X & X \\ + \hline + \#4 & 0 & 1 & X & X \\ + \hline + \#5 & 0 & 1 & 1 & X \\ + \hline + \end{tabular}\\ + \caption{Network availability table} + \begin{tabular}{|l|l|l|l|} + \hline + \#2 & \#3 & \#4 & \#5 \\ + \hline + L=0 & L=0 & L=0 & L=0 \\ + \hline + L=1 & L=0 & L=0 & L=0 \\ + \hline + L=1 PTR(3$\rightarrow{}$1) & L=0 Exp \#2 & L=0 Exp \#2 & L=0 Exp \#2 \\ + \hline + L=0 PTR(3$\rightarrow{}$1) Exp \#3 & L=1 PTR(3$\rightarrow{}$1, 4$\rightarrow{}$1, 5$\rightarrow{}$1) & L=0 Exp \#3 & L=0 Exp \#3 \\ + \hline + L=0 Rejected, Bad ID & L=1 PTR(2$\rightarrow{}$2, 4$\rightarrow{}$1, 5$\rightarrow{}$1) & L=0 & L=0 \\ + \hline + L=0 Reset & L=1 PTR(2$\rightarrow{}$2, 4$\rightarrow{}$1, 5$\rightarrow{}$1) & L=0 & L=0 \\ + \hline + L=0 Index changed to \#6 & L=1 PTR(6$\rightarrow{}$2, 4$\rightarrow{}$1, 5$\rightarrow{}$1) & L=0 & L=0 \\ + \hline + L=0 & L=1 & L=0 & L=0 \\ + \hline + \end{tabular} + \label{tab:ptr_workflow} + \caption{PTR workflow} +\end{table} + +\section{Communication between coordination servers} + +Communication between communication servers can be done either in anonymous mode, the Index ID sent in those requests is in that case sent as $0$. $0$ is otherwise an illegal Index ID. + +Anonymous connections are the following operations: + +\begin{itemize} + \item Connection as a new member + \item ID Reset announcement after falling behind in Index ID over + \item Querying the currently known leadership +\end{itemize} + +The Index ID works as a header. Its format is a one byte size information, followed by a multi-precision big-endian unsigned integer of up to 256 bytes. + +Identified connections are the following operations: + +\begin{itemize} + \item Querying a heartbeat from the leader + \item Querying the full cluster information + \item Sending lock information for safekeeping + \item Sending back-end cluster changes updates +\end{itemize} + +\section{$A/B/AB$ fragments} + +Let $X$ be a number writable using $262144$ binary digits. Let's represent that number as $X={X_{high}X_{low}}_{2^{17}}$. + +The $A$ part of $X$, written $A(X)$ is equal to $X_{high}$. The $B$ part of $X$, written $B(X)$ is equal to $X_{low}$. + +The $AB$ part is the base $2^6$ digit wise modulo addition of $A$ and $B$, written as $(A+B)(X)$. + +\section{Ownership of read-only blocks} + +Ownership of blocks is stored on a global level on the same idea as payments. + +Ownerships are the combination of a Block UUID, a User ID, a read flag and a write flag. Creation ownership is enforced above all as a read-write relationship. This means that the list of ownerships is not tested when one is the owner of a block. + +\chapter{Time synchronization}\label{time_sync_chapter} + \section{Steadiness requirement} + +The time is directed by the coordination server. Each server must synchronize to that time. + +The time is never guaranteed to be the real time, but it is guaranteed that any time query return value will be strictly greater than the highest recorded time of the server. When the system is running in normal and degraded state, the unit of the subtraction of timestamps is nanoseconds, else it is undefined. + \section{Storage side requirement} +Storage side MUST keep stored its natural clock offset and the actual server offset as obtained per the time synchronization protocol. + +Along with the server time, a value MUST be provided, named $t_{sync}$, representing the time required for the server to communicate with all storages summed with the reply time slowest storage to copy back. + +$t_{sync}$ MAY be calculated pessimistically to provide atomicity for longer operations. + +Storage side MUST refuse any transaction that would make write a node before in time than a node in the same record chain. + +Storage side MAY free any non protected record that is labeled after $t_{last}$. + +\begin{center} +\label{time_sync_equation} +\begin{minipage}{0.6\textwidth} +$t_{last} = t_{global} + t_{sync}$ +\\\\ +$t_{global} = t_{client} + t_{offset}$ +\\\\ +$with~~t_{offset} = \overline{t_{client}-t_{server}+(\frac{t_{return} - t_{client}}{2})}$ +\\\\ +$\overline{n}$ being the average recorded value of $n$. + +$t_{client}$, $t_{server}$, $t_{return}$ being as defined at page \pageref{fig:time_proto}. +\end{minipage} +\end{center} + +Storage servers MUST be able to provide their last registered timestamp. + \chapter{Client side} + +It is advised that all of the primitives used for encryption and random number generation are copies of the primitives from the OpenBSD project wherever possible, from other public domain projects otherwise. They are to be contained in a separate library dynamically linked with the client. They are to only operate on memory allocated through a randomized layout memory allocator and blurred in randomized data. + \section{Key blocks and system root layout} -\section{Command line user interface} -\section{Graphical user interface} + +Key blocks are a tuple of a Block ID, a cipher identifier, and a root position in the Block ID. A cipher identifier is a list of tuples containing a cipher identifier followed by a righteously sized key. + +The ciphers in a key block are ordered by the order they are used for encryption of a data point. + +The system root layout is composed of a sequence of authentication headers, each of them containing a token that is required to prove ownership of a key to the authentication system. + +It is also composed of a separate sequence containing encrypted key blocks. Both sequences have to be refreshed together at each authentication. + +Storage of an additional initialization vector is possible. Both sequences are separate entities but are considered to be following each other, the authentication one first followed by the key sequence. No padding should be inserted between them. + +\section{Authentication} + +Along with the Key blocks and authentication headers, the system also contains a space to store a nonce. Any multi-precision integer if up to 256 bytes can be stored in that space. + +Transmission of that number is done as an hexadecimal number in a null terminated string. + +Whatever the authentication mode, the workflow is the following: +\begin{enumerate} + \item A websocket over SSL connection is established + \item The user provides its User ID, blocking authentications for that user for 30 seconds + \item The server provides back the user's authentication header and the nonce as a JSON document + \begin{itemize} + \item[\texttt{"auth\string_head"}:] the authentication headers as a byte array + \item[\texttt{"nonce"}:] the nonce as an hexadecimal string + \end{itemize} + \item The user deciphers the authentication headers, then prepend a new authentication header + \item The user then sends the deciphered token, as well as the new authentication header and the new nonce + \item If the server reads the same token as the deciphered token, then the authentication headers, the token and a nonce are replaced. The servers then transmits an authentication token +\end{enumerate} + + + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.13em]{192} + + \bitheader{0,7,31,63,71,95} \\ + \bitbox[lrtb]{8}{\texttt{s}} + \bitbox[tr]{184}{}\\ + \wordbox[lr]{1}{\texttt{Forward randomized padding of length s bytes}}\\ + \skippedwords \\\wordbox[lbr]{1}{}\\ + \bitbox[rl]{64}{\texttt{Root count}} + \bitbox[rl]{32}{\texttt{token sz}} + \bitbox[rl]{96}{\texttt{Authentication token}}\\ + \begin{rightwordgroup}{this element is repeated\\for every root} + \wordbox[ltr]{1}{\texttt{record identifier}}\\ + \bitbox[lrt]{8}{\texttt{n}} + \bitbox[rt]{184}{\texttt{root name}}\\ + \bitbox[lrt]{8}{\texttt{m}} + \bitbox[rt]{184}{\texttt{root metadata}}\\ + \bitbox[lrt]{8}{\texttt{c}} + \bitbox[rt]{64}{\texttt{cipher id}} + \bitbox[rt]{120}{\texttt{KEY}}\\ + \bitbox[lrt]{64}{\texttt{cipher id}} + \bitbox[rt]{128}{\texttt{KEY}}\\ + \bitbox[lrt]{64}{\texttt{cipher id}} + \bitbox[rt]{128}{\texttt{KEY}} + \end{rightwordgroup}\\ + \wordbox[ltr]{1}{\vspace{0.96em}\texttt{\ldots}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{Authentication header layout} + \label{fig:authheader} +\end{figure} + +In the \autoref{fig:authheader}, the \texttt{cipher id} correspond to a specifically attributed identifier for each supported ciphers. The identifiers are specified as big-endian 64bits integers. They are associated with the related key size. Identifiers with a most significant byte left to 0 are explicitly unused and are free for any user defined primitives. The other variables respectively refer to: +\begin{itemize} +\item[\texttt{n}:] length of the \texttt{root name} +\item[\texttt{m}:] length of the \texttt{root metadata} +\item[\texttt{c}:] number of ciphers in the cryptographic pipeline +\end{itemize} + +\subsection{Password mode} + +In password mode, the password get blurred into an expanded key. This expanded key along with the nonce, encrypts the authentication header. + +This expansion is to use PBKDF2/HMAC-SHA1 with a salt determined as the user name. + +\subsection{One-Time Pad key mode} + +In one time pad mode, the nonce corresponds to the offset from the start of the pad. + +The pad information is stored in a complete directory. The directory is expected to have the following structure: + +\begin{figure}[H] + \centering + \begin{minipage}{0.72\textwidth} + \begin{itemize} + \item[\texttt{/pad}:] A file containing raw high entropy data to use as a one-time pad + \item[\texttt{/userinfo.json}:] configuration information for the authentication, included the user name, eventual metadata (name and contents of certain blocks for example). + \end{itemize} + \end{minipage} + \caption{Directory structure of an authentication device} + \label{fig:pad_auth_structure} +\end{figure} + +\section{Key generation} +\subsection{Random data sources} + +We envision to provide cheap entropy sources that can be used with commodity laptops and with cheap hardware: + +\begin{itemize} + \item microphones + \item webcams + \item mouse movements + \item radio noise +\end{itemize} + +For all of those signals, only the lowest significant bits will be used as entropy sources, meaning that whatever happen only small amounts of entropy will be gathered. Multiple sources could even be used in conjunction. + +Those sources will be used to be expanded into the one time pad used for authentication. + +\subsection{Random data expansion} + +TODO: examine the usability of a sliding key RC6 implementation in templated C++ with arbitrary $w$, $r$ and $k$ values. + +\section{Library} +\subsection{Authentication API} +\subsection{Storage substructures API} +\subsubsection{\texttt{rstring}} + +Recursive strings (or \texttt{rstring}) are persistent data structures. They represent an expendable span of characters through a recursive tree of pages. This recursive string can be block specific or block agnostic. Block specific recursive strings are not advised for any string that may grow longer than 4GB, corresponding to a recursion level of 2 for a block specific; they would, up to 147GB, correspond to a recursion level of 3 in block agnostic mode. + +\begin{figure}[H] + \centering + \begin{tabular}{|r|r|r|r|r|} + \hline + Recursion level & Near & Far & Page count (Near) & Page count(Far)\\ + \hline + 0 & 32576B & 32576B & 1 & 1\\ + \hline + 1 & 15MiB & 5MiB & 510 & 170\\ + \hline + 2 & 7.8GiB & 0.8GiB & 259'590 & 28'731\\ + \hline + 3 & 3.9TiB & 149GiB & 132'131'819 & 4'855'540\\ + \hline + 4 & 1.94PiB & 24TiB & 67'255'096'380 & 820'586'261\\ + \hline + 5 & 988PiB & 3.9PiB & 34'232'844'057'929 & 138'679'078'110\\ + \hline + \end{tabular} + \caption{\texttt{rstring} maximum capacity by recursion level} + \label{tab:gp_rstring_sizes} +\end{figure} + +Conversion from block specific string to a block agnostic string is possible, leaving the originally written data in its former block. + +\begin{table}[H] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.90\textwidth]{rstring}{0,0} + \attribute{+ $<<$sz$>>$ : size\string_t} + \attribute{+ $<<$mode = no\_{}assert$>>$ : mode\string_t} + \attribute{+ fptr : gp::far\string_ptr} + + \operation{+ rstring(root : gp::far\string_ptr)} + \operation{+ rstring(root : gp::far\string_ptr, nonce\string_gen : function)} + \operation{+ read(position : size\string_t, length : size\string_t)} + \operation{+ write(position : size\string_t, data : std::string\string_view)} + \operation{+ append(data : std::string\string_view)} + \operation{+ \string~rstring()} + \end{class} + \end{tikzpicture} + \caption{\texttt{gplib} class diagram for \texttt{rstring} in deprived construction} + \label{fig:gp_class_diagram_rstring_noassert} +\end{table} + +\begin{figure}[H] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.90\textwidth]{rstring}{0,0} + \attribute{+ $<<$mode = can\_{}assert$>>$ : mode\string_t} + \attribute{+ fptr : gp::far\string_ptr} + \attribute{+ lvl\string_value : uint64\string_t} + \attribute{+ lvl\string_cache : gp::rstring$<$can\_{}cache$>$} + + \operation{+ rstring(root : gp::far\string_ptr)} + \operation{+ rstring(root : gp::far\string_ptr, nonce\string_gen : function)} + \operation{+ read(position : size\string_t, length : size\string_t)} + \operation{+ write(position : size\string_t, data : std::string\string_view)} + \operation{+ append(data : std::string\string_view)} + \operation{+ \string~rstring()} + \end{class} + \end{tikzpicture} + \caption{\texttt{gplib} class diagram for \texttt{rstring} in asserting construction} + \label{fig:gp_class_diagram_rstring_assert} +\end{figure} + +\begin{figure}[H] + \centering + \begin{tikzpicture} + \begin{class}[text width=0.90\textwidth]{rstring}{0,0} + \attribute{+ $<<$mode = exclusive$>>$ : mode\string_t} + \attribute{+ fptr : gp::far\string_ptr} + \attribute{+ lvl\string_value : uint64\string_t} + \attribute{+ lvl\string_cache : gp::rstring$<$exclusive$>$} + + \operation{+ rstring(root : gp::far\string_ptr)} + \operation{+ rstring(root : gp::far\string_ptr, nonce\string_gen : function)} + \operation{+ read(position : size\string_t, length : size\string_t)} + \operation{+ write(position : size\string_t, data : std::string\string_view)} + \operation{+ append(data : std::string\string_view)} + \operation{+ operator[](position : size\_{}t) : gp::pseudo\string_{}ptr} + \operation{+ \string~rstring()} + \end{class} + \end{tikzpicture} + \caption{\texttt{gplib} class diagram for \texttt{rstring} in exclusive construction} + \label{fig:gp_class_diagram_rstring_exclusive} +\end{figure} + + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.13em]{192} + + \bitheader{0,31,63,95,127,159,191} \\ + \bitbox[lrt]{32}{\texttt{level}} + \bitbox[lrt]{32}{\texttt{size}} + \bitbox[lrt]{8}{\texttt{1}} + \bitbox[lrt]{120}{\texttt{Unused}}\\ + \bitbox[lrt]{128}{\texttt{ri\string_uuid}} + \bitbox[lrt]{32}{\texttt{ri\string_x}} + \bitbox[lrt]{32}{\texttt{ri\string_y}} \\ + \wordbox{1}{\texttt{request\string_identifier}} \\ + \bitbox[lrt]{192}{} \\ + \wordbox[lr]{1}{\vspace{0.96em}\texttt{\ldots}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{Non-zero level far pointer \texttt{rstring}} + \label{fig:nzfprstring} +\end{figure} + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.13em]{192} + + \bitheader{0,31,63,95,127,159,191} \\ + \bitbox[lrt]{32}{\texttt{level}} + \bitbox[lrt]{32}{\texttt{size}} + \bitbox[lrt]{8}{\texttt{0}} + \bitbox[lrt]{120}{\texttt{Unused}}\\ + \bitbox[lrt]{128}{\texttt{ri\string_uuid}} + \bitbox[lrt]{32}{\texttt{ri\string_x}} + \bitbox[lrt]{32}{\texttt{ri\string_y}} \\ + \bitbox[lrt]{32}{\texttt{ri\string_x}} + \bitbox[lrt]{32}{\texttt{ri\string_y}} + \bitbox[lrt]{64}{\texttt{ri\string_coords}} + \bitbox[lrt]{64}{\texttt{ri\string_coords}} \\ + \bitbox[lrt]{192}{} \\ + \wordbox[lr]{1}{\vspace{0.96em}\texttt{\ldots}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{Non-zero level near pointer \texttt{rstring}} + \label{fig:nznprstring} +\end{figure} + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.13em]{192} + \bitheader{0,31,63,95,127,159,191} \\ + \bitbox[lrtb]{32}{\texttt{0}} + \bitbox[lrtb]{32}{\texttt{size}} + \bitbox[lrt]{128}{\texttt{}}\\ + \bitbox[lr]{192}{} \\ + \wordbox[lr]{1}{\vspace{0.96em}\texttt{data}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{Zero level \texttt{rstring}} + \label{fig:zrstring} +\end{figure} + +\subsubsection{\texttt{rstring\string_table}} + + + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.60em]{64} + \bitheader{0-3,31,63} \\ + \bitbox[lrt]{1}{\texttt{0}} + \bitbox[lrt]{1}{\texttt{0}} + \bitbox[lrt]{1}{\texttt{F}} + \bitbox[lrt]{1}{\texttt{G}} + \bitbox[lrt]{60}{\texttt{}}\\ + \bitbox[lrtb]{64}{\texttt{near\string_ptr}} + \end{bytefield} + \vspace{0.5em} + + \begin{bytefield}[bitwidth=0.60em]{64} + \bitheader{0-3,31,63} \\ + \bitbox[lrtb]{1}{\texttt{1}} + \bitbox[lrtb]{1}{\texttt{0}} + \bitbox[lrt]{1}{\texttt{F}} + \bitbox[lrt]{1}{\texttt{G}} + \bitbox[lrt]{60}{\texttt{}}\\ + \wordbox[lrtb]{3}{\texttt{far\string_ptr}} + \end{bytefield} + + \begin{itemize} + \item[\texttt{F}:] if set, \texttt{T} is a \texttt{hashseqfar}, else it is an \texttt{hashseqnear} + \item[\texttt{G}:] if set, the table uses \texttt{pointfar} as a first underlying structure, else it uses a \texttt{pointnear} + \end{itemize} + \caption{\texttt{rstring} table root} + \label{fig:rstringtable} +\end{figure} + + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.26em]{144} + \bitheader{0,31,63,95,127,144} \\ + \bitbox[lrtb]{64}{\texttt{hash}} + \bitbox[lrtb]{64}{\texttt{near\string_ptr}} + \bitbox[lrtb]{16}{\texttt{ll}}\\ + \bitbox[lr]{144}{\texttt{\ldots{}}} + \wordbox[lr]{1}{\vspace{0.96em}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{\texttt{hashseqnear} substructure} + \label{fig:hashseqnear} +\end{figure} + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.13em]{272} + \bitheader{0,31,63,95,127,159,191,223,255,271} \\ + \bitbox[lrtb]{64}{\texttt{hash}} + \bitbox[lrtb]{192}{\texttt{far\string_ptr}} + \bitbox[lrtb]{16}{\texttt{ll}}\\ + \bitbox[lr]{272}{\texttt{\ldots{}}} + \wordbox[lr]{1}{\vspace{0.96em}} \\ + \skippedwords \\\wordbox[lrb]{1}{} + \end{bytefield} + \caption{\texttt{hashseqfar} substructure} + \label{fig:hashseqfar} +\end{figure} + + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.44em]{80} + \bitheader{0,31,63,79} \\ + \bitbox[lrt]{64}{\texttt{near\string_ptr}} + \bitbox[lrt]{16}{\texttt{sz}}\\ + \bitbox[tlr]{80}{\texttt{key data\ldots{}}}\\ + \skippedwords \\ + \bitbox[lrb]{16}{} + \bitbox[tlrb]{64}{\texttt{U}} + \end{bytefield} + \caption{\texttt{pointnear} substructure} + \label{fig:pointnear} +\end{figure} + +\begin{figure}[H] + \centering + \begin{bytefield}[bitwidth=0.44em]{80} + \bitheader{0,31,63,79} \\ + \wordbox[lrt]{2}{\texttt{far\string_ptr}}\\ + \bitbox[lrb]{32}{} + \bitbox[lrbt]{16}{\texttt{sz}} + \bitbox[lrt]{32}{\texttt{}}\\ + \bitbox[lr]{80}{\texttt{key data\ldots{}}}\\ + \skippedwords \\ + \bitbox[lrb]{48}{} + \bitbox[tlr]{32}{}\\ + \wordbox[lrb]{2}{\texttt{U}} + \end{bytefield} + \caption{\texttt{pointfar} substructure} + \label{fig:pointfar} +\end{figure} + +\texttt{U} is either a pointer to sequel of the key information in a \texttt{rstring} or a continuation of the data if the \texttt{sz} is above the addressable size. + +\subsection{Cryptographic API} +\subsubsection{Cryptographic pipelines} + +Cryptographic pipelines are a mean to stack encryptions and keys. their implementation is a sequence of page encrypters virtual objects holding their specific key. Cryptographic pipelines must offer a solution to clean their data up before deletion. + +Cryptographic pipelines are expected to expose a function \texttt{decrypt} and a function \texttt{encrypt} both taking a reference to a page, a record identifier and a \texttt{Maybe(UInt64)} monad or similar type. Neither the \texttt{encrypt} nor the \texttt{decrypt} functions shall be able to fail with an exception. They should always leave the page is a valid state such as: \begin{itemize} + \item[] $encrypt(decrypt(P_a,r_i,Maybe(n)),r_i,Maybe(n))=P_a$ + \item[] $decrypt(encrypt(P_a,r_i,Maybe(n)),r_i,Maybe(n))=P_a$ +\end{itemize} + + $P_a$ being an arbitrary page, $r_i$ any record identifier, and $Maybe(n)$ a monadic value representing an integer $n$ or its absence. + +Cryptographic pipelines are to always be declared in decryption order. + +\subsection{Distributed hash table} + +The hash table used is an \texttt{byte-ll} hash table using a pointer size defined either being a 64bits near pointer or a 192bits far pointer. + +The record part of the table is a fully allocated \texttt{rstring\string_table} root containing a pointer to a \texttt{hashseqnear}/\texttt{hashseqfar} as fit per their definitions. Each key/value pair is a separate \texttt{pointnear}/\texttt{pointfar} structure pointing to a \texttt{rstring} of the data. + +\subsection{Distributed block} + +Distributed blocks can be allowed to be cached on read, if they are, then they are locked and the lock are refreshed on a regular basis. + +Distributed blocks are a special implementation of a \texttt{rstring}, they are to be used directly in the locked mode as a block storage system. \chapter{Block storage system} +The block storage system is an explicit case of using the distributed block system, with the associated locking. It is redimensionable to bigger sizes. It is allocated using the smallest cached block mode required. + +Blocks are composed of a recursive construct of far pointers to full database pages. + +\section{Header Block definition} + + + \chapter{Native file system} +\section{Header Block definition} + +\section{Properties} +\subsection{Limits} +\subsection{Permissions} +\subsubsection{NT ACL} +\subsubsection{Unix users} +\subsubsection{ACL2Unix} +\section{Caution advised} +\subsection{Client omnipotence} +\subsection{Vulnerabilities by design} + \appendix \part{Annexes} @@ -1357,7 +2640,39 @@ Compactness of a cipher means that if you encrypt a message of side $n$ you will for example, let's consider a simple cipher: for a message $A$, read it as a number and multiply it with a value that will be the key. -If your message is for example 8 digits, like $00005555$ and the key is $12345678$, the cipher-text will be equal to $5555 \times 12345678 = 68580241290$ which make a 11 digits cipher-text from a 8 digit message. +If your message is for example 8 digits, like $00005555$ and the key is $12345678$, the cipher-text will be equal to $5555 \times 12345678 = 68580241290$ which make a 11 digits cipher-text from a 8 digit message, and hence make the transformation non compact. + +An example of compact transformation is the truncated addition, also named modulo addition, used by systems like one-time pads. + +\subsection{Forward secrecy} + +Forward secrecy is the property of a encryption system to protect parts of the messages given some were compromised. The typical use-case is, for example, to prevent decryption of a message if the message before that was compromised. + +Forward secrecy is very important for messaging systems as it is suited to the fact that the key may be changed quite often, or that the key is not sufficient to break the encryption. + +\subsection{Durability of secrecy} + +Durability of an encryption system depends on two main factors: the resistance, all flaws taken in consideration, of the encryption scheme and the evolution of computers and their accessibility in the future. + +You can put it in the terms of encryption having a expiration date. Past this time, someone that started decryption of the data you encrypted immediately may have deciphered your data. + +\subsection{Encryption flaws/Cryptanalysis} + +Some encryption systems have know flaws. Flaws in cryptography can either render the encryption obsolete or lower the work required to find a key. They always affect the durability of the secrecy in different scales. + +For example, some flaws of the AES algorithm make it less safe by an order of magnitude of several thousands times faster to break. A complete brute-force attach is believed to take $3\times{}10^{51}$ years at most for the AES-256 variant with 50 surrealistic supercomputers able to compute a billion billion keys per second, With that it is clear that breaking it even with an advantage due to flaws is not realistic in a human lifetime. + +Some other flaws may compromise systems are flaws that compromise completely cryptographic systems or that are predicted to theoretically compromise it in the years to come. An example of that is prime number factorization based asymmetrical, currently threatened by quantum based cryptography. + +Research of flaws in cryptographic systems is named cryptanalysis. + +\subsection{Side channel attacks} + +Side-channel attacks refer to attacks on a cryptographic system that do not affect the way the system is designed but the way it is implemented. For example, using the sound made by electric current in a CPU have been used against some implementations of the OpenSSL library to deduce part of the key that was being used. + +It is very hard to predict side channel attacks, and just as hard to prevent them. + +A typical mitigation is for example to ensure all cryptographic operations take a constant amount of time. This prevents a typical attack called a time-based side channel attack. \subsection{Homomorphism} @@ -1376,20 +2691,20 @@ Symmetrical encryption aims to encrypt data on a two way channel. The key allows \begin{figure}[h] \begin{center} \begin{itemize} - \item AES + \item Rijndael \item Chacha20 \item Blowfish \item Serpent \item Twofish \item CAST5 - \item RC4 + \item RC4 and RC6 \item DES \item 3DES \item Skipjack \item IDEA \end{itemize} \end{center} - \caption{List of symmetrical ciphers} + \caption{List of symmetrical ciphers (non-exhaustive)} \label{fig:sym_ciphers} \end{figure} @@ -1408,7 +2723,7 @@ The goal of asymmetrical encryption is to provide ways to authenticate messages, \item Lattice based (NTRU, BLISS\autocite{Gentry:2009:FHE:1834954}) \end{itemize} \end{center} - \caption{List of asymmetrical ciphers} + \caption{List of asymmetrical ciphers (non-exhaustive)} \label{fig:asym_ciphers} \end{figure} @@ -1416,6 +2731,14 @@ It is evolving a lot nowadays as the most used algorithms are not extremely resi This kind of cryptographic systems are generally used to exchange keys for symmetrical encryption. +\subsection{One-Time Pads} + +One time pads are a cryptography technique that suppose both sides of a communication own a shared pad at least the size of the data to encrypt. Elements of the pad are added with a modulo addition to the plain-text to generate the cipher-text. + +There is no known way to decipher the data without the pad, making One-Time Pad (or OTP, not to be mixed with One-Time Passwords) the safest cryptographic scheme, albeit an unrealistic one for large amounts of data. + +It also relies on a different channel to transmit the pad in case of communications, yet doesn't in the case of storage. + \backmatter %---------------------------------------------------------------------------------------- diff --git a/whitepaper/structure.tex b/whitepaper/structure.tex index b1bc7f4..6c0f73c 100755 --- a/whitepaper/structure.tex +++ b/whitepaper/structure.tex @@ -501,7 +501,7 @@ innerbottommargin=5pt]{cBox} % LINKS %---------------------------------------------------------------------------------------- -\usepackage{hyperref} +\usepackage[hidelinks]{hyperref} %\hypersetup{hidelinks,backref=true,pagebackref=true,colorlinks=false,breaklinks=true,urlcolor=ocre,bookmarks=true,bookmarksopen=false} \usepackage{bookmark}