Skip to content

Computations for Memory Overhead Required for LZMA, ZPAQ, and BZIP3

Peter Hyman edited this page Nov 2, 2023 · 1 revision

Memory Overhead Computation

An important change in lrzip-next was the revision of the way that block sizes for threads was computed. lrzip favored maximizing number of threads but at the expense of block sizes to be compressed. With the addition of being able to set Dictionary sizes, I wanted to be able to maximize dictionary size and block size even if it meant fewer threads. I discovered that the lzma backend can and will use multiple threads on its own, regardless of lrzip's setting. Also, as in the open_stream_out() function, overhead never needs to be recomputed even for multiple chunks.

from util.c

115 void setup_overhead(rzip_control *control)
116 {
117         /* Work out the compression overhead per compression thread for the
118          * compression back-ends that need a lot of ram
119          * and set Dictionary size */
120         if (LZMA_COMPRESS) {
121                 if (control->dictSize == 0) {
122                         switch (control->compression_level) {
123                         case 1:
124                         case 2:
125                         case 3: control->dictSize = (1 << (control->compression_level * 2 + 16));
126                                 break; // 256KB to 4MB
127                         case 4:
128                         case 5:
129                         case 6: control->dictSize = (1 << (control->compression_level + 19));
130                                 break; // 8MB to 32MB
131                         case 7: control->dictSize = (1 << 25);
132                                 break; // 32MB
133                         case 8: control->dictSize = (1 << 26);
134                                 break; // 64MB
135                         case 9: control->dictSize = (1 << 27);
136                                 break; // 128MB -- this is maximum for 32 bits
137                         default: control->dictSize = (1 << 24);
138                                 break; // 16MB -- should never reach here
139                         }
140                 }
141                 /* LZMA spec shows memory requirements as 6MB, not 4MB and state size
142                  * where default is 16KB */
143                 control->overhead = ((i64)control->dictSize * 23 / 2) + (6 * ONE_MB) + 16384;
144         } else if (ZPAQ_COMPRESS) {
145                 control->zpaq_level = (int) round(((float) control->compression_level * 5 / 9));
146                 if (control->zpaq_bs == 0) {
147                         switch (control->compression_level) {
148                         case 1:
149                         case 2:
150                         case 3: control->zpaq_bs = 4;
151                                 break;  //16MB ZPAQ Default
152                         case 4:
153                         case 5: control->zpaq_bs = 5;
154                                 break;  //32MB
155                         case 6: control->zpaq_bs = 6;
156                                 break;  //64MB
157                         case 7: control->zpaq_bs = 7;
158                                 break;  //128MB
159                         case 8: control->zpaq_bs = 8;
160                                 break;  //256MB
161                         case 9: control->zpaq_bs = 9;
162                                 break;  //512MB
163                         default: control->zpaq_bs = 4;
164                                 break;  // should never reach here
165                         }
166                 }
167                 /* Overhead computation is 2^bs * 1MB + per thread overhead
168                  * taken from zpaq documentation. Amounts per thread are
169                  * approximate, but should reduce swap usage.
170                  * see http://mattmahoney.net/dc/zpaq.html
171                  */
172                 control->overhead = (i64) (1 << control->zpaq_bs) * ONE_MB +
173                         (control->zpaq_level == 1 ? 128 * ONE_MB :
174                          (control->zpaq_level == 2 ? 450 * ONE_MB :
175                           (control->zpaq_level == 3 ? 450 * ONE_MB :
176                            (control->zpaq_level == 4 ? 550 * ONE_MB :
177                             850 * ONE_MB))));
178         } else if(BZIP3_COMPRESS) {
179                 /* if block size property 0-8 not set...*/
180                 if (control->bzip3_block_size == 0)
181                         control->bzip3_bs = control->compression_level - 1;
182
183                 /* compute actual block size */
184                 control->bzip3_block_size = BZIP3_BLOCK_SIZE_FROM_PROP(control->bzip3_bs);
185                 control->overhead = (i64) control->bzip3_block_size * 6;
186         }
187
188         /* no need for zpaq computation here. do in open_stream_out() */
189 }