summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorluispedro <>2020-05-22 21:19:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-05-22 21:19:00 (GMT)
commit0efdf0ff3140e8935c7a292604120a88ad21a1ce (patch)
tree65189a1f747548fdaa37c408e0159423722c2a2d
parentfe05eebb04516b3c923851595652b3a929852897 (diff)
version 0.1.2.0HEAD0.1.2.0master
-rw-r--r--CONTRIBUTING.md19
-rw-r--r--ChangeLog3
-rw-r--r--README.md17
-rw-r--r--zstd.cabal5
-rw-r--r--zstd/CONTRIBUTING.md352
-rw-r--r--zstd/README.md43
-rw-r--r--zstd/TESTING.md4
-rw-r--r--zstd/lib/README.md63
-rw-r--r--zstd/lib/common/bitstream.h77
-rw-r--r--zstd/lib/common/compiler.h45
-rw-r--r--zstd/lib/common/cpu.h2
-rw-r--r--zstd/lib/common/debug.c42
-rw-r--r--zstd/lib/common/debug.h42
-rw-r--r--zstd/lib/common/entropy_common.c46
-rw-r--r--zstd/lib/common/error_private.c3
-rw-r--r--zstd/lib/common/error_private.h8
-rw-r--r--zstd/lib/common/fse.h44
-rw-r--r--zstd/lib/common/fse_decompress.c47
-rw-r--r--zstd/lib/common/huf.h48
-rw-r--r--zstd/lib/common/mem.h77
-rw-r--r--zstd/lib/common/pool.c12
-rw-r--r--zstd/lib/common/pool.h4
-rw-r--r--zstd/lib/common/threading.c54
-rw-r--r--zstd/lib/common/threading.h40
-rw-r--r--zstd/lib/common/xxhash.c58
-rw-r--r--zstd/lib/common/xxhash.h42
-rw-r--r--zstd/lib/common/zstd_common.c2
-rw-r--r--zstd/lib/common/zstd_errors.h3
-rw-r--r--zstd/lib/common/zstd_internal.h180
-rw-r--r--zstd/lib/compress/fse_compress.c57
-rw-r--r--zstd/lib/compress/hist.c50
-rw-r--r--zstd/lib/compress/hist.h44
-rw-r--r--zstd/lib/compress/huf_compress.c184
-rw-r--r--zstd/lib/compress/zstd_compress.c2521
-rw-r--r--zstd/lib/compress/zstd_compress_internal.h372
-rw-r--r--zstd/lib/compress/zstd_compress_literals.c158
-rw-r--r--zstd/lib/compress/zstd_compress_literals.h29
-rw-r--r--zstd/lib/compress/zstd_compress_sequences.c419
-rw-r--r--zstd/lib/compress/zstd_compress_sequences.h54
-rw-r--r--zstd/lib/compress/zstd_compress_superblock.c845
-rw-r--r--zstd/lib/compress/zstd_compress_superblock.h32
-rw-r--r--zstd/lib/compress/zstd_cwksp.h525
-rw-r--r--zstd/lib/compress/zstd_double_fast.c108
-rw-r--r--zstd/lib/compress/zstd_double_fast.h4
-rw-r--r--zstd/lib/compress/zstd_fast.c158
-rw-r--r--zstd/lib/compress/zstd_fast.h4
-rw-r--r--zstd/lib/compress/zstd_lazy.c116
-rw-r--r--zstd/lib/compress/zstd_lazy.h2
-rw-r--r--zstd/lib/compress/zstd_ldm.c42
-rw-r--r--zstd/lib/compress/zstd_ldm.h9
-rw-r--r--zstd/lib/compress/zstd_opt.c195
-rw-r--r--zstd/lib/compress/zstd_opt.h2
-rw-r--r--zstd/lib/compress/zstdmt_compress.c146
-rw-r--r--zstd/lib/compress/zstdmt_compress.h5
-rw-r--r--zstd/lib/decompress/huf_decompress.c136
-rw-r--r--zstd/lib/decompress/zstd_ddict.c20
-rw-r--r--zstd/lib/decompress/zstd_ddict.h4
-rw-r--r--zstd/lib/decompress/zstd_decompress.c447
-rw-r--r--zstd/lib/decompress/zstd_decompress_block.c797
-rw-r--r--zstd/lib/decompress/zstd_decompress_block.h6
-rw-r--r--zstd/lib/decompress/zstd_decompress_internal.h22
-rw-r--r--zstd/lib/dictBuilder/cover.c218
-rw-r--r--zstd/lib/dictBuilder/cover.h59
-rw-r--r--zstd/lib/dictBuilder/fastcover.c103
-rw-r--r--zstd/lib/dictBuilder/zdict.c62
-rw-r--r--zstd/lib/dictBuilder/zdict.h84
-rw-r--r--zstd/lib/zstd.h353
67 files changed, 6629 insertions, 3145 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a0d99fa..5611654 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,7 +11,7 @@ functionality.
If you run into problems with either the code, documentation, or
examples, please
-[file a GitHub issue](https://github.com/facebookexperimental/hs-zstd/issues).
+[file a GitHub issue](https://github.com/luispedro/hs-zstd/issues).
(Note: if you'd like to file an issue requesting new functionality,
rather than bugfixes, please plan to implement that functionality
@@ -29,21 +29,11 @@ request that you follow the conventions below.
3. If you're changing APIs, please update the appropriate
documentation and examples, and the [changelog](changelog.md).
(Breaking API changes have a high bar for approval.)
-4. Ensure that the test suite passes (`cabal test`).
+4. Ensure that the test suite passes (`stack build --test`).
5. Make sure your code conforms to the existing style you see in the
code base.
6. Tidy up your commit history before submitting a pull request. This
means one commit per logical change, please.
-7. If you haven't yet done so, please complete our Contributor License
- Agreement ("CLA").
-
-## Contributor License Agreement ("CLA")
-
-In order to accept your pull request, we need you to submit a CLA. You
-only need to do this once, after which you can work on any of
-Facebook's open source projects.
-
-Complete your CLA here: <https://code.facebook.com/cla>
## Issues
@@ -51,11 +41,6 @@ We use GitHub issues to track public bugs. Please ensure that your
description is clear and has sufficient instructions for us be able to
reproduce the issue.
-Facebook has a [bounty program](https://www.facebook.com/whitehat/)
-for the safe disclosure of security bugs. In those cases, please go
-through the process outlined on that page and do not file a public
-issue.
-
## License
By contributing to `hs-zstd`, you agree that your contributions will
diff --git a/ChangeLog b/ChangeLog
index e41ae88..58f7881 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+Version 0.1.2.0 2020-05-21 by luispedro
+ * Update to zstd 1.4.5 (PR #2 by Vanessa McHale)
+
Version 0.1.1.2 2019-05-20 by luispedro
* Re-release v0.1.1.1 with updated info
diff --git a/README.md b/README.md
index 78e0f0f..24a0381 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,14 @@
# Zstandard bindings for Haskell
-[![Travis](https://api.travis-ci.org/luispedro/hs-zstd.png)](https://travis-ci.org/luispedro/hs-zstd)
+[![Travis](https://api.travis-ci.com/luispedro/hs-zstd.png)](https://travis-ci.com/luispedro/hs-zstd)
This library provides Haskell bindings to the
[Zstandard compression library](http://facebook.github.io/zstd/).
+Note that is now the official repositoryc for the
+[zstd](https://hackage.haskell.org/package/zstd) Haskell package. The original
+authors (Facebook) are no longer interested in maintaining it.
+
The library is structured to provide several layers of abstraction.
* For the simplest use cases, the top-level
@@ -14,12 +18,13 @@ The library is structured to provide several layers of abstraction.
* If you need to stream a large amount of data with a constant memory
footprint, use the
[`Zstd.Streaming`](http://hackage.haskell.org/package/zstd/docs/Codec-Compression-Zstd-Streaming.html)
- module. This can also be used as a building block for adapting to
- streaming libraries such as `pipes` and `conduit`. (If you need to
- use lazy bytestrings instead, see the
+ module. See also the
+ [`conduit-zstd`](https://hackage.haskell.org/package/conduit-zstd) package
+ which provides a very thin wrapper to integrate with the `conduit` library.
+ If you need to use lazy bytestrings instead, see the
[`Zstd.Lazy`](http://hackage.haskell.org/package/zstd/docs/Codec-Compression-Zstd-Lazy.html)
- module. This is built using the abstractions from the
- `Zstd.Streaming` module.)
+ module. This is built using the abstractions from the `Zstd.Streaming`
+ module.
* When your usage is dominated by lots of small messages (presumably
using pre-computed compression dictionaries), use the
diff --git a/zstd.cabal b/zstd.cabal
index 3c66e67..b59f030 100644
--- a/zstd.cabal
+++ b/zstd.cabal
@@ -1,5 +1,5 @@
name: zstd
-version: 0.1.1.2
+version: 0.1.2.0
synopsis: Haskell bindings to the Zstandard compression algorithm
description:
A fast lossless compression algorithm, targeting real-time
@@ -63,6 +63,9 @@ library
zstd/lib/compress/fse_compress.c
zstd/lib/compress/hist.c
zstd/lib/compress/huf_compress.c
+ zstd/lib/compress/zstd_compress_literals.c
+ zstd/lib/compress/zstd_compress_sequences.c
+ zstd/lib/compress/zstd_compress_superblock.c
zstd/lib/compress/zstd_compress.c
zstd/lib/compress/zstd_double_fast.c
zstd/lib/compress/zstd_fast.c
diff --git a/zstd/CONTRIBUTING.md b/zstd/CONTRIBUTING.md
index dd013f8..637e371 100644
--- a/zstd/CONTRIBUTING.md
+++ b/zstd/CONTRIBUTING.md
@@ -26,6 +26,356 @@ to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
+## Workflow
+Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd
+will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple
+related changes into a branch.
+
+Our contribution process works in three main stages:
+1. Local development
+ * Update:
+ * Checkout your fork of zstd if you have not already
+ ```
+ git checkout https://github.com/<username>/zstd
+ cd zstd
+ ```
+ * Update your local dev branch
+ ```
+ git pull https://github.com/facebook/zstd dev
+ git push origin dev
+ ```
+ * Topic and development:
+ * Make a new branch on your fork about the topic you're developing for
+ ```
+ # branch names should be consise but sufficiently informative
+ git checkout -b <branch-name>
+ git push origin <branch-name>
+ ```
+ * Make commits and push
+ ```
+ # make some changes =
+ git add -u && git commit -m <message>
+ git push origin <branch-name>
+ ```
+ * Note: run local tests to ensure that your changes didn't break existing functionality
+ * Quick check
+ ```
+ make shortest
+ ```
+ * Longer check
+ ```
+ make test
+ ```
+2. Code Review and CI tests
+ * Ensure CI tests pass:
+ * Before sharing anything to the community, make sure that all CI tests pass on your local fork.
+ See our section on setting up your CI environment for more information on how to do this.
+ * Ensure that static analysis passes on your development machine. See the Static Analysis section
+ below to see how to do this.
+ * Create a pull request:
+ * When you are ready to share you changes to the community, create a pull request from your branch
+ to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home
+ page.
+ * From there, select the branch where you made changes as your source branch and facebook:dev
+ as the destination.
+ * Examine the diff presented between the two branches to make sure there is nothing unexpected.
+ * Write a good pull request description:
+ * While there is no strict template that our contributors follow, we would like them to
+ sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests,
+ at least indirectly, address the following points.
+ * Is this pull request important and why?
+ * Is it addressing an issue? If so, what issue? (provide links for convenience please)
+ * Is this a new feature? If so, why is it useful and/or necessary?
+ * Are there background references and documents that reviewers should be aware of to properly assess this change?
+ * Note: make sure to point out any design and architectural decisions that you made and the rationale behind them.
+ * Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>)
+ * Submit the pull request and iterate with feedback.
+3. Merge and Release
+ * Getting approval:
+ * You will have to iterate on your changes with feedback from other collaborators to reach a point
+ where your pull request can be safely merged.
+ * To avoid too many comments on style and convention, make sure that you have a
+ look at our style section below before creating a pull request.
+ * Eventually, someone from the zstd team will approve your pull request and not long after merge it into
+ the dev branch.
+ * Housekeeping:
+ * Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure
+ the corresponding issue is mentioned. If your change 'fixes' or completely addresses the
+ issue at hand, then please indicate this by requesting that an issue be closed by commenting.
+ * Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
+ that the change must make it to an official zstd release for it to be meaningful. We recommend
+ that contributers track the activity on their pull request and corresponding issue(s) page(s) until
+ their change makes it to the next release of zstd. Users will often discover bugs in your code or
+ suggest ways to refine and improve your initial changes even after the pull request is merged.
+
+## Static Analysis
+Static analysis is a process for examining the correctness or validity of a program without actually
+executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for
+static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build.
+
+Once installed, you can ensure that our static analysis tests pass on your local development machine
+by running:
+```
+make staticAnalyze
+```
+
+In general, you can use `scan-build` to static analyze any build script. For example, to static analyze
+just `contrib/largeNbDicts` and nothing else, you can run:
+
+```
+scan-build make -C contrib/largeNbDicts largeNbDicts
+```
+
+## Performance
+Performance is extremely important for zstd and we only merge pull requests whose performance
+landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
+This high bar for performance means that every PR which has the potential to
+impact performance takes a very long time for us to properly review. That being said, we
+always welcome contributions to improve performance (or worsen performance for the trade-off of
+something else). Please keep the following in mind before submitting a performance related PR:
+
+1. Zstd isn't as old as gzip but it has been around for time now and its evolution is
+very well documented via past Github issues and pull requests. It may be the case that your
+particular performance optimization has already been considered in the past. Please take some
+time to search through old issues and pull requests using keywords specific to your
+would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected
+on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case,
+it will be helpful for you to have context from that topic's history before contributing.
+2. The distinction between noise and actual performance gains can unfortunately be very subtle
+especially when microbenchmarking extremely small wins or losses. The only remedy to getting
+something subtle merged is extensive benchmarking. You will be doing us a great favor if you
+take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
+benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
+your changes on every single processor and os out there (and neither will we) but do that best
+you can:) We've adding some things to think about when benchmarking below in the Benchmarking
+Performance section which might be helpful for you.
+3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
+legitimate thing to do as long as it does not harm the overall performance health of Zstd.
+This is a hard balance to strike but please keep in mind other aspects of Zstd when
+submitting changes that are clang-specific, windows-specific, etc.
+
+## Benchmarking Performance
+Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical
+testing over theoretical speculation. This guide it not perfect but for most scenarios, it
+is a good place to start.
+
+### Stability
+Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable
+benchmarking machine. A virtual machine, a machine with shared resources, or your laptop
+will typically not be stable enough to obtain reliable benchmark results. If you can get your
+hands on a desktop, this is usually a better scenario.
+
+Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
+do a little more work to ensure that you are in fact measuring the changes you've made not and
+noise. Here are some things you can do to make your benchmarks more stable:
+
+1. The most simple thing you can do to drastically improve the stability of your benchmark is
+to run it multiple times and then aggregate the results of those runs. As a general rule of
+thumb, the smaller the change you are trying to measure, the more samples of benchmark runs
+you will have to aggregate over to get reliable results. Here are some additional things to keep in
+mind when running multiple trials:
+ * How you aggregate your samples are important. You might be tempted to use the mean of your
+ results. While this is certainly going to be a more stable number than a raw single sample
+ benchmark number, you might have more luck by taking the median. The mean is not robust to
+ outliers whereas the median is. Better still, you could simply take the fastest speed your
+ benchmark achieved on each run since that is likely the fastest your process will be
+ capable of running your code. In our experience, this (aggregating by just taking the sample
+ with the fastest running time) has been the most stable approach.
+ * The more samples you have, the more stable your benchmarks should be. You can verify
+ your improved stability by looking at the size of your confidence intervals as you
+ increase your sample count. These should get smaller and smaller. Eventually hopefully
+ smaller than the performance win you are expecting.
+ * Most processors will take some time to get `hot` when running anything. The observations
+ you collect during that time period will very different from the true performance number. Having
+ a very large number of sample will help alleviate this problem slightly but you can also
+ address is directly by simply not including the first `n` iterations of your benchmark in
+ your aggregations. You can determine `n` by simply looking at the results from each iteration
+ and then hand picking a good threshold after which the variance in results seems to stabilize.
+2. You cannot really get reliable benchmarks if your host machine is simultaneously running
+another cpu/memory-intensive application in the background. If you are running benchmarks on your
+personal laptop for instance, you should close all applications (including your code editor and
+browser) before running your benchmarks. You might also have invisible background applications
+running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
+on Windows. You will get more stable benchmark results of you end those processes as well.
+ * If you have multiple cores, you can even run your benchmark on a reserved core to prevent
+ pollution from other OS and user processes. There are a number of ways to do this depending
+ on your OS:
+ * On linux boxes, you have use https://github.com/lpechacek/cpuset.
+ * On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows
+ * On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2
+3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd.
+Dynamically linking your library will introduce some added variation (not a large amount but
+definitely some). Statically linking libzstd will be more stable. Static libraries should
+be enabled by default when building zstd.
+4. Use a profiler with a good high resolution timer. See the section below on profiling for
+details on this.
+5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS)
+6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on
+tmpfs avoids touching a real storage system, which can have a pretty big variability.
+
+Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html
+
+### Zstd benchmark
+The fastest signal you can get regarding your performance changes is via the in-build zstd cli
+bench option. You can run Zstd as you typically would for your scenario using some set of options
+and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline
+for that options you have just provided. If you want to look at the internals of how this
+benchmarking script works, you can check out programs/benchzstd.c
+
+For example: say you have made a change that you believe improves the speed of zstd level 1. The
+very first thing you should use to asses whether you actually achieved any sort of improvement
+is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
+specify a running time for your benchmark in seconds (default is 3 seconds).
+Usually, the longer the running time, the more stable your results will be.
+
+```
+$ git checkout <commit-before-your-change>
+$ make && cp zstd zstd-old
+$ git checkout <commit-after-your-change>
+$ make && cp zstd zstd-new
+$ zstd-old -i5 -b1 <your-test-data>
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.6 MB/s , 626.4 MB/s
+$ zstd-new -i5 -b1 <your-test-data>
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.8 MB/s , 628.4 MB/s
+```
+
+Unless your performance win is large enough to be visible despite the intrinsic noise
+on your computer, benchzstd alone will likely not be enough to validate the impact of your
+changes. For example, the results of the example above indicate that effectively nothing
+changed but there could be a small <3% improvement that the noise on the host machine
+obscured. So unless you see a large performance win (10-15% consistently) using just
+this method of evaluation will not be sufficient.
+
+### Profiling
+There are a number of great profilers out there. We're going to briefly mention how you can
+profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
+on windows.
+
+Say you have an idea for a change that you think will provide some good performance gains
+for level 1 compression on Zstd. Typically this means, you have identified a section of
+code that you think can be made to run faster.
+
+The first thing you will want to do is make sure that the piece of code is actually taking up
+a notable amount of time to run. It is usually not worth optimzing something which accounts for less than
+0.0001% of the total running time. Luckily, there are tools to help with this.
+Profilers will let you see how much time your code spends inside a particular function.
+If your target code snippit is only part of a function, it might be worth trying to
+isolate that snippit by moving it to its own function (this is usually not necessary but
+might be).
+
+Most profilers (including the profilers dicusssed below) will generate a call graph of
+functions for you. Your goal will be to find your function of interest in this call grapch
+and then inspect the time spent inside of it. You might also want to to look at the
+annotated assembly which most profilers will provide you with.
+
+#### Instruments
+We will once again consider the scenario where you think you've identified a piece of code
+whose performance can be improved upon. Follow these steps to profile your code using
+Instruments.
+
+1. Open Instruments
+2. Select `Time Profiler` from the list of standard templates
+3. Close all other applications except for your instruments window and your terminal
+4. Run your benchmarking script from your terminal window
+ * You will want a benchmark that runs for at least a few seconds (5 seconds will
+ usually be long enough). This way the profiler will have something to work with
+ and you will have ample time to attach your profiler to this process:)
+ * I will just use benchzstd as my bencharmking script for this example:
+```
+$ zstd -b1 -i5 <my-data> # this will run for 5 seconds
+```
+5. Once you run your benchmarking script, switch back over to instruments and attach your
+process to the time profiler. You can do this by:
+ * Clicking on the `All Processes` drop down in the top left of the toolbar.
+ * Selecting your process from the dropdown. In my case, it is just going to be labled
+ `zstd`
+ * Hitting the bright red record circle button on the top left of the toolbar
+6. You profiler will now start collecting metrics from your bencharking script. Once
+you think you have collected enough samples (usually this is the case after 3 seconds of
+recording), stop your profiler.
+7. Make sure that in toolbar of the bottom window, `profile` is selected.
+8. You should be able to see your call graph.
+ * If you don't see the call graph or an incomplete call graph, make sure you have compiled
+ zstd and your benchmarking scripg using debug flags. On mac and linux, this just means
+ you will have to supply the `-g` flag alone with your build script. You might also
+ have to provide the `-fno-omit-frame-pointer` flag
+9. Dig down the graph to find your function call and then inspect it by double clicking
+the list item. You will be able to see the annotated source code and the assembly side by
+side.
+
+#### Perf
+
+This wiki has a pretty detailed tutorial on getting started working with perf so we'll
+leave you to check that out of you're getting started:
+
+https://perf.wiki.kernel.org/index.php/Tutorial
+
+Some general notes on perf:
+* Use `perf stat -r # <bench-program>` to quickly get some relevant timing and
+counter statistics. Perf uses a high resolution timer and this is likely one
+of the first things your team will run when assessing your PR.
+* Perf has a long list of hardware counters that can be viewed with `perf --list`.
+When measuring optimizations, something worth trying is to make sure the handware
+counters you expect to be impacted by your change are in fact being so. For example,
+if you expect the L1 cache misses to decrease with your change, you can look at the
+counter `L1-dcache-load-misses`
+* Perf hardware counters will not work on a virtual machine.
+
+#### Visual Studio
+
+TODO
+
+
+## Setting up continuous integration (CI) on your fork
+Zstd uses a number of different continuous integration (CI) tools to ensure that new changes
+are well tested before they make it to an official release. Specifically, we use the platforms
+travis-ci, circle-ci, and appveyor.
+
+Changes cannot be merged into the main dev branch unless they pass all of our CI tests.
+The easiest way to run these CI tests on your own before submitting a PR to our dev branch
+is to configure your personal fork of zstd with each of the CI platforms. Below, you'll find
+instructions for doing this.
+
+### travis-ci
+Follow these steps to link travis-ci with your github fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://travis-ci.org/
+3. Click 'Sign in with Github' on the top right
+4. Click 'Authorize travis-ci'
+5. Click 'Activate all repositories using Github Apps'
+6. Select 'Only select repositories' and select your fork of zstd from the drop down
+7. Click 'Approve and Install'
+8. Click 'Sign in with Github' again. This time, it will be for travis-pro (which will let you view your tests on the web dashboard)
+9. Click 'Authorize travis-pro'
+10. You should have travis set up on your fork now.
+
+### circle-ci
+TODO
+
+### appveyor
+Follow these steps to link circle-ci with your girhub fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://www.appveyor.com/
+3. Click 'Sign in' on the top right
+4. Select 'Github' on the left panel
+5. Click 'Authorize appveyor'
+6. You might be asked to select which repositories you want to give appveyor permission to. Select your fork of zstd if you're prompted
+7. You should have appveyor set up on your fork now.
+
+### General notes on CI
+CI tests run every time a pull request (PR) is created or updated. The exact tests
+that get run will depend on the destination branch you specify. Some tests take
+longer to run than others. Currently, our CI is set up to run a short
+series of tests when creating a PR to the dev branch and a longer series of tests
+when creating a PR to the master branch. You can look in the configuration files
+of the respective CI platform for more information on what gets run when.
+
+Most people will just want to create a PR with the destination set to their local dev
+branch of zstd. You can then find the status of the tests on the PR's page. You can also
+re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
+
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
@@ -34,7 +384,7 @@ Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
-## Coding Style
+## Coding Style
* 4 spaces for indentation rather than tabs
## License
diff --git a/zstd/README.md b/zstd/README.md
index 290341c..5c300fd 100644
--- a/zstd/README.md
+++ b/zstd/README.md
@@ -15,6 +15,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
[![Build status][AppveyorDevBadge]][AppveyorLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
+[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.org/facebook/zstd
@@ -24,14 +25,16 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
+[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
+[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd
## Benchmarks
For reference, several fast compression algorithms were tested and compared
-on a server running Arch Linux (`Linux version 5.0.5-arch1-1`),
+on a server running Arch Linux (`Linux version 5.5.11-arch1-1`),
with a Core i9-9900K CPU @ 5.0GHz,
using [lzbench], an open-source in-memory benchmark by @inikep
-compiled with [gcc] 8.2.1,
+compiled with [gcc] 9.3.0,
on the [Silesia compression corpus].
[lzbench]: https://github.com/inikep/lzbench
@@ -40,18 +43,26 @@ on the [Silesia compression corpus].
| Compressor name | Ratio | Compression| Decompress.|
| --------------- | ------| -----------| ---------- |
-| **zstd 1.4.0 -1** | 2.884 | 530 MB/s | 1360 MB/s |
-| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 440 MB/s |
-| brotli 1.0.7 -0 | 2.701 | 430 MB/s | 470 MB/s |
-| quicklz 1.5.0 -1 | 2.238 | 600 MB/s | 800 MB/s |
-| lzo1x 2.09 -1 | 2.106 | 680 MB/s | 950 MB/s |
-| lz4 1.8.3 | 2.101 | 800 MB/s | 4220 MB/s |
-| snappy 1.1.4 | 2.073 | 580 MB/s | 2020 MB/s |
-| lzf 3.6 -1 | 2.077 | 440 MB/s | 930 MB/s |
+| **zstd 1.4.5 -1** | 2.884 | 500 MB/s | 1660 MB/s |
+| zlib 1.2.11 -1 | 2.743 | 90 MB/s | 400 MB/s |
+| brotli 1.0.7 -0 | 2.703 | 400 MB/s | 450 MB/s |
+| **zstd 1.4.5 --fast=1** | 2.434 | 570 MB/s | 2200 MB/s |
+| **zstd 1.4.5 --fast=3** | 2.312 | 640 MB/s | 2300 MB/s |
+| quicklz 1.5.0 -1 | 2.238 | 560 MB/s | 710 MB/s |
+| **zstd 1.4.5 --fast=5** | 2.178 | 700 MB/s | 2420 MB/s |
+| lzo1x 2.10 -1 | 2.106 | 690 MB/s | 820 MB/s |
+| lz4 1.9.2 | 2.101 | 740 MB/s | 4530 MB/s |
+| **zstd 1.4.5 --fast=7** | 2.096 | 750 MB/s | 2480 MB/s |
+| lzf 3.6 -1 | 2.077 | 410 MB/s | 860 MB/s |
+| snappy 1.1.8 | 2.073 | 560 MB/s | 1790 MB/s |
[zlib]: http://www.zlib.net/
[LZ4]: http://www.lz4.org/
+The negative compression levels, specified with `--fast=#`,
+offer faster compression and decompression speed in exchange for some loss in
+compression ratio compared to level 1, as seen in the table above.
+
Zstd can also offer stronger compression ratios at the cost of compression speed.
Speed vs Compression trade-off is configurable by small increments.
Decompression speed is preserved and remains roughly the same at all settings,
@@ -140,6 +151,18 @@ example about how Meson is used to build this project.
Note that default build type is **release**.
+### VCPKG
+You can build and install zstd [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
+
+ git clone https://github.com/Microsoft/vcpkg.git
+ cd vcpkg
+ ./bootstrap-vcpkg.sh
+ ./vcpkg integrate install
+ ./vcpkg install zstd
+
+The zstd port in vcpkg is kept up to date by Microsoft team members and community contributors.
+If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
### Visual Studio (Windows)
Going into `build` directory, you will find additional possibilities:
diff --git a/zstd/TESTING.md b/zstd/TESTING.md
index 551981b..7e53051 100644
--- a/zstd/TESTING.md
+++ b/zstd/TESTING.md
@@ -11,7 +11,7 @@ They consist of the following tests:
- Compilation on all supported targets (x86, x86_64, ARM, AArch64, PowerPC, and PowerPC64)
- Compilation on various versions of gcc, clang, and g++
- `tests/playTests.sh` on x86_64, without the tests on long data (CLI tests)
-- Small tests (`tests/legacy.c`, `tests/longmatch.c`, `tests/symbols.c`) on x64_64
+- Small tests (`tests/legacy.c`, `tests/longmatch.c`) on x64_64
Medium Tests
------------
@@ -19,7 +19,7 @@ Medium tests run on every commit and pull request to `dev` branch, on TravisCI.
They consist of the following tests:
- The following tests run with UBsan and Asan on x86_64 and x86, as well as with
Msan on x86_64
- - `tests/playTests.sh --test-long-data`
+ - `tests/playTests.sh --test-large-data`
- Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c`
- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode)
- Valgrind Test (`make -C tests valgrindTest`) (testing CLI and fuzzer under valgrind)
diff --git a/zstd/lib/README.md b/zstd/lib/README.md
index 792729b..6ccffb1 100644
--- a/zstd/lib/README.md
+++ b/zstd/lib/README.md
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
Both conditions are automatically applied when invoking `make lib-mt` target.
When linking a POSIX program with a multithreaded version of `libzstd`,
-note that it's necessary to request the `-pthread` flag during link stage.
+note that it's necessary to invoke the `-pthread` flag during link stage.
Multithreading capabilities are exposed
-via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
+via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
#### API
@@ -85,33 +85,64 @@ The file structure is designed to make this selection manually achievable for an
- While invoking `make libzstd`, it's possible to define build macros
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
- This will also disable compilation of all dependencies
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
-
-- There are some additional build macros that can be used to minify the decoder.
-
- Zstandard often has more than one implementation of a piece of functionality,
- where each implementation optimizes for different scenarios. For example, the
- Huffman decoder has complementary implementations that decode the stream one
- symbol at a time or two symbols at a time. Zstd normally includes both (and
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
+ corresponding features. This will also disable compilation of all
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
+ dictBuilder).
+
+- There are a number of options that can help minimize the binary size of
+ `libzstd`.
+
+ The first step is to select the components needed (using the above-described
+ `ZSTD_LIB_COMPRESSION` etc.).
+
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
+ disables various optional components and changes the compilation flags to
+ prioritize space-saving.
+
+ Detailed options: Zstandard's code and build environment is set up by default
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
+ makes significant trade-offs in code size. For example, Zstandard often has
+ more than one implementation of a particular component, with each
+ implementation optimized for different scenarios. For example, the Huffman
+ decoder has complementary implementations that decode the stream one symbol at
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
only one or the other of two decompression implementations. The smallest
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
For squeezing the last ounce of size out, you can also define
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
which removes the error messages that are otherwise returned by
- `ZSTD_getErrorName`.
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
+
+ Finally, when integrating into your application, make sure you're doing link-
+ time optimation and unused symbol garbage collection (via some combination of,
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
+ compiler's documentation.
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
the shared library, which is now hidden by default.
+- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
+ These instructions contribute to better performance, notably on the decoder side.
+ By default, this feature is automatically enabled on detecting
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
+ It's obviously disabled for different cpus,
+ or when BMI2 instruction set is _required_ by the compiler command line
+ (in this case, only the BMI2 code path is generated).
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
+ or prevent it (0). It overrides automatic detection.
+
#### Windows : using MinGW+MSYS to create DLL
diff --git a/zstd/lib/common/bitstream.h b/zstd/lib/common/bitstream.h
index d955bd6..37b99c0 100644
--- a/zstd/lib/common/bitstream.h
+++ b/zstd/lib/common/bitstream.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- bitstream
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#ifndef BITSTREAM_H_MODULE
#define BITSTREAM_H_MODULE
@@ -48,6 +28,7 @@ extern "C" {
* Dependencies
******************************************/
#include "mem.h" /* unaligned access routines */
+#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
@@ -57,6 +38,8 @@ extern "C" {
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
+#elif defined(__ICCARM__)
+# include <intrinsics.h>
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
@@ -159,10 +142,11 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
- _BitScanReverse ( &r, val );
- return (unsigned) r;
+ return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return 31 - __builtin_clz (val);
+ return __builtin_clz (val) ^ 31;
+# elif defined(__ICCARM__) /* IAR Intrinsic */
+ return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
@@ -240,9 +224,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
- assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7;
bitC->bitContainer >>= nbBytes*8;
}
@@ -256,6 +240,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+ assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -406,6 +391,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
return value;
}
+/*! BIT_reloadDStreamFast() :
+ * Similar to BIT_reloadDStream(), but with two differences:
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ * point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+ return BIT_DStream_overflow;
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+ bitD->ptr -= bitD->bitsConsumed >> 3;
+ bitD->bitsConsumed &= 7;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ return BIT_DStream_unfinished;
+}
+
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
@@ -417,10 +419,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
return BIT_DStream_overflow;
if (bitD->ptr >= bitD->limitPtr) {
- bitD->ptr -= bitD->bitsConsumed >> 3;
- bitD->bitsConsumed &= 7;
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
- return BIT_DStream_unfinished;
+ return BIT_reloadDStreamFast(bitD);
}
if (bitD->ptr == bitD->start) {
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
diff --git a/zstd/lib/common/compiler.h b/zstd/lib/common/compiler.h
index 0836e3e..95e9483 100644
--- a/zstd/lib/common/compiler.h
+++ b/zstd/lib/common/compiler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -17,13 +17,13 @@
/* force inlining */
#if !defined(ZSTD_NO_INLINE)
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@@ -61,11 +61,18 @@
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
#endif
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+# define UNUSED_ATTR __attribute__((unused))
+#else
+# define UNUSED_ATTR
+#endif
+
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
-# ifdef __GNUC__
+# if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
@@ -76,7 +83,7 @@
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
@@ -107,6 +114,9 @@
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+# elif defined(__aarch64__)
+# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
@@ -127,6 +137,31 @@
} \
}
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+# else
+# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+# endif
+#else
+# define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */
diff --git a/zstd/lib/common/cpu.h b/zstd/lib/common/cpu.h
index 5f0923f..6e8a974 100644
--- a/zstd/lib/common/cpu.h
+++ b/zstd/lib/common/cpu.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-present, Facebook, Inc.
+ * Copyright (c) 2018-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/zstd/lib/common/debug.c b/zstd/lib/common/debug.c
index 3ebdd1c..f303f4a 100644
--- a/zstd/lib/common/debug.c
+++ b/zstd/lib/common/debug.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- debug
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
diff --git a/zstd/lib/common/debug.h b/zstd/lib/common/debug.h
index b4fc89d..ac62248 100644
--- a/zstd/lib/common/debug.h
+++ b/zstd/lib/common/debug.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- debug
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
diff --git a/zstd/lib/common/entropy_common.c b/zstd/lib/common/entropy_common.c
index b12944e..9d3e4e8 100644
--- a/zstd/lib/common/entropy_common.c
+++ b/zstd/lib/common/entropy_common.c
@@ -1,36 +1,16 @@
-/*
- Common functions of New Generation Entropy library
- Copyright (C) 2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
-*************************************************************************** */
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
/* *************************************
* Dependencies
diff --git a/zstd/lib/common/error_private.c b/zstd/lib/common/error_private.c
index 7c1bb67..cd43752 100644
--- a/zstd/lib/common/error_private.c
+++ b/zstd/lib/common/error_private.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -47,6 +47,7 @@ const char* ERR_getErrorString(ERR_enum code)
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+ case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(maxCode):
default: return notErrorCode;
}
diff --git a/zstd/lib/common/error_private.h b/zstd/lib/common/error_private.h
index 0d2fa7e..982cf8e 100644
--- a/zstd/lib/common/error_private.h
+++ b/zstd/lib/common/error_private.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum;
/*-****************************************
* Error codes handling
******************************************/
-#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
+#undef ERROR /* already defined on Visual Studio */
#define ERROR(name) ZSTD_ERROR(name)
#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
@@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
+
/*-****************************************
* Error Strings
diff --git a/zstd/lib/common/fse.h b/zstd/lib/common/fse.h
index 811c670..ff54e70 100644
--- a/zstd/lib/common/fse.h
+++ b/zstd/lib/common/fse.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy codec
- Public Prototypes declaration
- Copyright (C) 2013-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
@@ -308,7 +288,7 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
diff --git a/zstd/lib/common/fse_decompress.c b/zstd/lib/common/fse_decompress.c
index 72bbead..bcc2223 100644
--- a/zstd/lib/common/fse_decompress.c
+++ b/zstd/lib/common/fse_decompress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy decoder
- Copyright (C) 2013-2015, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
@@ -51,9 +31,6 @@
#define FSE_isError ERR_isError
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
-/* check and forward error code */
-#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
-
/* **************************************************************
* Templates
@@ -285,7 +262,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
if (FSE_isError(NCountLength)) return NCountLength;
- //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
+ /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
ip += NCountLength;
cSrcSize -= NCountLength;
diff --git a/zstd/lib/common/huf.h b/zstd/lib/common/huf.h
index 6b572c4..ef43268 100644
--- a/zstd/lib/common/huf.h
+++ b/zstd/lib/common/huf.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- huff0 huffman codec,
- part of Finite State Entropy library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
@@ -110,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -208,6 +188,8 @@ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
@@ -246,7 +228,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/** HUF_getNbBits() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
diff --git a/zstd/lib/common/mem.h b/zstd/lib/common/mem.h
index 5da2487..89c8aea 100644
--- a/zstd/lib/common/mem.h
+++ b/zstd/lib/common/mem.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -47,6 +47,79 @@ extern "C" {
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+/* detects whether we are being compiled under msan */
+#if defined (__has_feature)
+# if __has_feature(memory_sanitizer)
+# define MEMORY_SANITIZER 1
+# endif
+#endif
+
+#if defined (MEMORY_SANITIZER)
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+#include <stdint.h> /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+ This is a legacy interface that does not update origin information. Use
+ __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+ memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#if defined (__has_feature)
+# if __has_feature(address_sanitizer)
+# define ADDRESS_SANITIZER 1
+# endif
+#elif defined(__SANITIZE_ADDRESS__)
+# define ADDRESS_SANITIZER 1
+#endif
+
+#if defined (ADDRESS_SANITIZER)
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+
/*-**************************************************************
* Basic Types
@@ -102,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
-# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
diff --git a/zstd/lib/common/pool.c b/zstd/lib/common/pool.c
index 7a82945..aa4b4de 100644
--- a/zstd/lib/common/pool.c
+++ b/zstd/lib/common/pool.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1;
- (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
- (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
- (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+ {
+ int error = 0;
+ error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+ error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+ error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+ if (error) { POOL_free(ctx); return NULL; }
+ }
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
diff --git a/zstd/lib/common/pool.h b/zstd/lib/common/pool.h
index 458d37f..259bafc 100644
--- a/zstd/lib/common/pool.h
+++ b/zstd/lib/common/pool.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,7 @@ extern "C" {
#include <stddef.h> /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
-#include "zstd.h"
+#include "../zstd.h"
typedef struct POOL_ctx_s POOL_ctx;
diff --git a/zstd/lib/common/threading.c b/zstd/lib/common/threading.c
index f3d4fa8..e2edb31 100644
--- a/zstd/lib/common/threading.c
+++ b/zstd/lib/common/threading.c
@@ -2,18 +2,21 @@
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
*/
/**
* This file will hold wrapper for systems, which do not support pthreads
*/
+#include "threading.h"
+
/* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useless_symbol;
@@ -28,7 +31,6 @@ int g_ZSTD_threading_useless_symbol;
/* === Dependencies === */
#include <process.h>
#include <errno.h>
-#include "threading.h"
/* === Implementation === */
@@ -73,3 +75,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
}
#endif /* ZSTD_MULTITHREAD */
+
+#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
+
+#include <stdlib.h>
+
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
+{
+ *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
+ if (!*mutex)
+ return 1;
+ return pthread_mutex_init(*mutex, attr);
+}
+
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
+{
+ if (!*mutex)
+ return 0;
+ {
+ int const ret = pthread_mutex_destroy(*mutex);
+ free(*mutex);
+ return ret;
+ }
+}
+
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
+{
+ *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
+ if (!*cond)
+ return 1;
+ return pthread_cond_init(*cond, attr);
+}
+
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
+{
+ if (!*cond)
+ return 0;
+ {
+ int const ret = pthread_cond_destroy(*cond);
+ free(*cond);
+ return ret;
+ }
+}
+
+#endif
diff --git a/zstd/lib/common/threading.h b/zstd/lib/common/threading.h
index d806c89..fd0060d 100644
--- a/zstd/lib/common/threading.h
+++ b/zstd/lib/common/threading.h
@@ -2,17 +2,20 @@
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
*/
#ifndef THREADING_H_938743
#define THREADING_H_938743
+#include "debug.h"
+
#if defined (__cplusplus)
extern "C" {
#endif
@@ -75,10 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
*/
-#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
+#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */
# include <pthread.h>
+#if DEBUGLEVEL < 1
+
#define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@@ -96,6 +101,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+#else /* DEBUGLEVEL >= 1 */
+
+/* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+#define ZSTD_pthread_mutex_t pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
+#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
+
+#define ZSTD_pthread_cond_t pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
+#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
+#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
+
+#define ZSTD_pthread_t pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+
+#endif
+
#else /* ZSTD_MULTITHREAD not defined */
/* No multithreading support */
diff --git a/zstd/lib/common/xxhash.c b/zstd/lib/common/xxhash.c
index 30599aa..597de18 100644
--- a/zstd/lib/common/xxhash.c
+++ b/zstd/lib/common/xxhash.c
@@ -1,35 +1,15 @@
/*
-* xxHash - Fast Hash algorithm
-* Copyright (C) 2012-2016, Yann Collet
-*
-* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are
-* met:
-*
-* * Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following disclaimer
-* in the documentation and/or other materials provided with the
-* distribution.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
-* You can contact the author at :
-* - xxHash homepage: http://www.xxhash.com
-* - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Fast Hash algorithm
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash homepage: http://www.xxhash.com
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
@@ -53,7 +33,8 @@
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+ defined(__ICCARM__)
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
@@ -114,13 +95,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
/* *************************************
* Compiler Specific Options
***************************************/
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
#endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@@ -206,7 +187,12 @@ static U64 XXH_read64(const void* memPtr)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
+#if defined(__ICCARM__)
+# include <intrinsics.h>
+# define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
@@ -723,7 +709,9 @@ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, c
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ if (input != NULL) {
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ }
state->memsize += (U32)len;
return XXH_OK;
}
diff --git a/zstd/lib/common/xxhash.h b/zstd/lib/common/xxhash.h
index 9bad1f5..4207eba 100644
--- a/zstd/lib/common/xxhash.h
+++ b/zstd/lib/common/xxhash.h
@@ -1,35 +1,15 @@
/*
- xxHash - Extremely Fast Hash algorithm
- Header File
- Copyright (C) 2012-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
/* Notice extracted from xxHash homepage :
diff --git a/zstd/lib/common/zstd_common.c b/zstd/lib/common/zstd_common.c
index 667f4a2..91fe332 100644
--- a/zstd/lib/common/zstd_common.c
+++ b/zstd/lib/common/zstd_common.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/zstd/lib/common/zstd_errors.h b/zstd/lib/common/zstd_errors.h
index 92a3433..998398e 100644
--- a/zstd/lib/common/zstd_errors.h
+++ b/zstd/lib/common/zstd_errors.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -76,6 +76,7 @@ typedef enum {
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
+ ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
diff --git a/zstd/lib/common/zstd_internal.h b/zstd/lib/common/zstd_internal.h
index 31f756a..3bc7e55 100644
--- a/zstd/lib/common/zstd_internal.h
+++ b/zstd/lib/common/zstd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -19,12 +19,15 @@
/*-*************************************
* Dependencies
***************************************/
+#ifdef __aarch64__
+#include <arm_neon.h>
+#endif
#include "compiler.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
+#include "../zstd.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
@@ -34,7 +37,6 @@
#endif
#include "xxhash.h" /* XXH_reset, update, digest */
-
#if defined (__cplusplus)
extern "C" {
#endif
@@ -55,15 +57,42 @@ extern "C" {
#define MAX(a,b) ((a)>(b) ? (a) : (b))
/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+ (void)format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+ if (0) { \
+ _force_has_format_string(__VA_ARGS__); \
+ }
+
+/**
* Return the specified error if the condition evaluates to true.
*
- * In debug modes, prints additional information. In order to do that
- * (particularly, printing the conditional that failed), this can't just wrap
- * RETURN_ERROR().
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
@@ -76,7 +105,9 @@ extern "C" {
*/
#define RETURN_ERROR(err, ...) \
do { \
- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
@@ -91,7 +122,9 @@ extern "C" {
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
@@ -129,6 +162,8 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
@@ -192,32 +227,99 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
/*-*******************************************
* Shared functions to include for inlining
*********************************************/
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+static void ZSTD_copy8(void* dst, const void* src) {
+#ifdef __aarch64__
+ vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+ memcpy(dst, src, 8);
+#endif
+}
+
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+#ifdef __aarch64__
+ vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+ memcpy(dst, src, 16);
+#endif
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+ ZSTD_no_overlap,
+ ZSTD_overlap_src_before_dst
+ /* ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
- * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+ * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ * The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
+ ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
- do
- COPY8(op, ip)
- while (op < oend);
+
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+ /* Handle short offset copies. */
+ do {
+ COPY8(op, ip)
+ } while (op < oend);
+ } else {
+ assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+ /* Separate out the first COPY16() call because the copy length is
+ * almost certain to be short, so the branches have different
+ * probabilities. Since it is almost certain to be short, only do
+ * one COPY16() in the first call. Then, do two calls per loop since
+ * at that point it is more likely to have a high trip count.
+ */
+#ifndef __aarch64__
+ do {
+ COPY16(op, ip);
+ }
+ while (op < oend);
+#else
+ COPY16(op, ip);
+ if (op >= oend) return;
+ do {
+ COPY16(op, ip);
+ COPY16(op, ip);
+ }
+ while (op < oend);
+#endif
+ }
}
-MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
- BYTE* const oend = (BYTE*)dstEnd;
- do
- COPY8(op, ip)
- while (op < oend);
+ size_t const length = MIN(dstCapacity, srcSize);
+ if (length > 0) {
+ memcpy(dst, src, length);
+ }
+ return length;
}
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
/*-*******************************************
* Private declarations
@@ -242,6 +344,31 @@ typedef struct {
U32 longLengthPos;
} seqStore_t;
+typedef struct {
+ U32 litLength;
+ U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+ ZSTD_sequenceLength seqLen;
+ seqLen.litLength = seq->litLength;
+ seqLen.matchLength = seq->matchLength + MINMATCH;
+ if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+ if (seqStore->longLengthID == 1) {
+ seqLen.litLength += 0xFFFF;
+ }
+ if (seqStore->longLengthID == 2) {
+ seqLen.matchLength += 0xFFFF;
+ }
+ }
+ return seqLen;
+}
+
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
@@ -268,10 +395,11 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
- _BitScanReverse(&r, val);
- return (unsigned)r;
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
- return 31 - __builtin_clz(val);
+ return __builtin_clz (val) ^ 31;
+# elif defined(__ICCARM__) /* IAR Intrinsic */
+ return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
diff --git a/zstd/lib/compress/fse_compress.c b/zstd/lib/compress/fse_compress.c
index 68b47e1..a427598 100644
--- a/zstd/lib/compress/fse_compress.c
+++ b/zstd/lib/compress/fse_compress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy encoder
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* **************************************************************
@@ -37,14 +17,14 @@
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
-#include "compiler.h"
-#include "mem.h" /* U32, U16, etc. */
-#include "debug.h" /* assert, DEBUGLOG */
+#include "../common/compiler.h"
+#include "../common/mem.h" /* U32, U16, etc. */
+#include "../common/debug.h" /* assert, DEBUGLOG */
#include "hist.h" /* HIST_count_wksp */
-#include "bitstream.h"
+#include "../common/bitstream.h"
#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
-#include "error_private.h"
+#include "../common/fse.h"
+#include "../common/error_private.h"
/* **************************************************************
@@ -645,9 +625,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
-
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
diff --git a/zstd/lib/compress/hist.c b/zstd/lib/compress/hist.c
index 45b7bab..61e08c7 100644
--- a/zstd/lib/compress/hist.c
+++ b/zstd/lib/compress/hist.c
@@ -1,42 +1,22 @@
/* ******************************************************************
- hist : Histogram functions
- part of Finite State Entropy project
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* --- dependencies --- */
-#include "mem.h" /* U32, BYTE, etc. */
-#include "debug.h" /* assert, DEBUGLOG */
-#include "error_private.h" /* ERROR */
+#include "../common/mem.h" /* U32, BYTE, etc. */
+#include "../common/debug.h" /* assert, DEBUGLOG */
+#include "../common/error_private.h" /* ERROR */
#include "hist.h"
diff --git a/zstd/lib/compress/hist.h b/zstd/lib/compress/hist.h
index 8b38935..77e3ec4 100644
--- a/zstd/lib/compress/hist.h
+++ b/zstd/lib/compress/hist.h
@@ -1,36 +1,16 @@
/* ******************************************************************
- hist : Histogram functions
- part of Finite State Entropy project
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* --- dependencies --- */
diff --git a/zstd/lib/compress/huf_compress.c b/zstd/lib/compress/huf_compress.c
index f074f1e..5468798 100644
--- a/zstd/lib/compress/huf_compress.c
+++ b/zstd/lib/compress/huf_compress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- Huffman encoder, part of New Generation Entropy library
- Copyright (C) 2013-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* **************************************************************
@@ -45,14 +25,14 @@
****************************************************************/
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
-#include "compiler.h"
-#include "bitstream.h"
+#include "../common/compiler.h"
+#include "../common/bitstream.h"
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
-#include "fse.h" /* header compression */
+#include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#include "error_private.h"
+#include "../common/huf.h"
+#include "../common/error_private.h"
/* **************************************************************
@@ -60,8 +40,6 @@
****************************************************************/
#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
/* **************************************************************
@@ -110,18 +88,18 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
/* Write table description header */
- { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
op += hSize;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
@@ -169,7 +147,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
}
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
@@ -192,9 +170,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
} }
/* fill nbBits */
+ *hasZeroWeights = 0;
{ U32 n; for (n=0; n<nbSymbols; n++) {
const U32 w = huffWeight[n];
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+ *hasZeroWeights |= (w == 0);
+ CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
} }
/* fill val */
@@ -240,7 +220,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits);
- U32 n = lastNonNull;
+ int n = (int)lastNonNull;
while (huffNode[n].nbBits > maxNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
@@ -255,22 +235,22 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* repay normalized cost */
{ U32 const noSymbol = 0xF0F0F0F0;
U32 rankLast[HUF_TABLELOG_MAX+2];
- int pos;
/* Get pos of last (smallest) symbol per rank */
memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits;
+ int pos;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
- rankLast[maxNbBits-currentNbBits] = pos;
+ rankLast[maxNbBits-currentNbBits] = (U32)pos;
} }
while (totalCost > 0) {
- U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+ U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
- U32 highPos = rankLast[nBitsToDecrease];
- U32 lowPos = rankLast[nBitsToDecrease-1];
+ U32 const highPos = rankLast[nBitsToDecrease];
+ U32 const lowPos = rankLast[nBitsToDecrease-1];
if (highPos == noSymbol) continue;
if (lowPos == noSymbol) break;
{ U32 const highTotal = huffNode[highPos].count;
@@ -297,7 +277,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
while (huffNode[n].nbBits == maxNbBits) n--;
huffNode[n+1].nbBits--;
- rankLast[1] = n+1;
+ assert(n >= 0);
+ rankLast[1] = (U32)(n+1);
totalCost++;
continue;
}
@@ -309,29 +290,36 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
return maxNbBits;
}
-
typedef struct {
U32 base;
U32 current;
} rankPos;
-static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+ huffNodeTable huffNodeTbl;
+ rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
{
- rankPos rank[32];
U32 n;
- memset(rank, 0, sizeof(rank));
+ memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
for (n=0; n<=maxSymbolValue; n++) {
U32 r = BIT_highbit32(count[n] + 1);
- rank[r].base ++;
+ rankPosition[r].base ++;
}
- for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
- for (n=0; n<32; n++) rank[n].current = rank[n].base;
+ for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
+ for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
for (n=0; n<=maxSymbolValue; n++) {
U32 const c = count[n];
U32 const r = BIT_highbit32(c+1) + 1;
- U32 pos = rank[r].current++;
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
+ U32 pos = rankPosition[r].current++;
+ while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
huffNode[pos] = huffNode[pos-1];
pos--;
}
@@ -343,45 +331,48 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
- nodeElt* const huffNode0 = (nodeElt*)workSpace;
+ HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
- U32 n, nonNullRank;
+ int nonNullRank;
int lowS, lowN;
- U16 nodeNb = STARTNODE;
- U32 nodeRoot;
+ int nodeNb = STARTNODE;
+ int n, nodeRoot;
/* safety checks */
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+ return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+ return ERROR(maxSymbolValue_tooLarge);
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
- HUF_sort(huffNode, count, maxSymbolValue);
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
/* init for parents */
- nonNullRank = maxSymbolValue;
+ nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
- huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+ huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
nodeNb++; lowS-=2;
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
/* create parents */
while (nodeNb <= nodeRoot) {
- U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
- U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
- huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+ huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
nodeNb++;
}
@@ -393,24 +384,25 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
/* enforce maxTableLog */
- maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
/* fill result into tree (val, nbBits) */
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+ int const alphabetSize = (int)(maxSymbolValue + 1);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
for (n=0; n<=nonNullRank; n++)
nbPerRank[huffNode[n].nbBits]++;
/* determine stating value per rank */
{ U16 min = 0;
- for (n=maxNbBits; n>0; n--) {
+ for (n=(int)maxNbBits; n>0; n--) {
valPerRank[n] = min; /* get starting value within each rank */
min += nbPerRank[n];
min >>= 1;
} }
- for (n=0; n<=maxSymbolValue; n++)
+ for (n=0; n<alphabetSize; n++)
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
- for (n=0; n<=maxSymbolValue; n++)
+ for (n=0; n<alphabetSize; n++)
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
}
@@ -423,11 +415,11 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
*/
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
{
- huffNodeTable nodeTable;
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
+ HUF_buildCTable_wksp_tables workspace;
+ return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
}
-static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
{
size_t nbBits = 0;
int s;
@@ -437,7 +429,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count
return nbBits >> 3;
}
-static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
@@ -476,7 +468,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
- { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
+ { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
@@ -573,7 +565,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart, (U16)cSize);
@@ -581,7 +574,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+2, (U16)cSize);
@@ -589,7 +583,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+4, (U16)cSize);
@@ -597,12 +592,14 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
+ assert(op <= oend);
+ assert(ip <= iend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
if (cSize==0) return 0;
op += cSize;
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
@@ -618,20 +615,21 @@ static size_t HUF_compressCTable_internal(
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
{
size_t const cSize = (nbStreams==HUF_singleStream) ?
- HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
- HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
/* check compressibility */
+ assert(op >= ostart);
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
- return op-ostart;
+ return (size_t)(op-ostart);
}
typedef struct {
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
- huffNodeTable nodeTable;
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
} HUF_compress_tables_t;
/* HUF_compress_internal() :
@@ -650,6 +648,8 @@ HUF_compress_internal (void* dst, size_t dstSize,
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
+ HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+
/* checks & inits */
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
@@ -691,7 +691,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
- table->nodeTable, sizeof(table->nodeTable));
+ &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
/* Zero unused symbols in CTable, so we can check it for validity */
diff --git a/zstd/lib/compress/zstd_compress.c b/zstd/lib/compress/zstd_compress.c
index 2e163c8..3f963b1 100644
--- a/zstd/lib/compress/zstd_compress.c
+++ b/zstd/lib/compress/zstd_compress.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -13,24 +13,34 @@
***************************************/
#include <limits.h> /* INT_MAX */
#include <string.h> /* memset */
-#include "cpu.h"
-#include "mem.h"
+#include "../common/cpu.h"
+#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
-#include "fse.h"
+#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
+#include "../common/huf.h"
#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
#include "zstd_fast.h"
#include "zstd_double_fast.h"
#include "zstd_lazy.h"
#include "zstd_opt.h"
#include "zstd_ldm.h"
+#include "zstd_compress_superblock.h"
/*-*************************************
* Helper functions
***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
size_t ZSTD_compressBound(size_t srcSize) {
return ZSTD_COMPRESSBOUND(srcSize);
}
@@ -40,15 +50,15 @@ size_t ZSTD_compressBound(size_t srcSize) {
* Context memory management
***************************************/
struct ZSTD_CDict_s {
- void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
- void* workspace;
- size_t workspaceSize;
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+ ZSTD_cwksp workspace;
ZSTD_matchState_t matchState;
ZSTD_compressedBlockState_t cBlockState;
ZSTD_customMem customMem;
U32 dictID;
+ int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -80,25 +90,26 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
}
}
-ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
{
- ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace;
+ ZSTD_cwksp ws;
+ ZSTD_CCtx* cctx;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
- memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+ cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+ if (cctx == NULL) return NULL;
+
+ memset(cctx, 0, sizeof(ZSTD_CCtx));
+ ZSTD_cwksp_move(&cctx->workspace, &ws);
cctx->staticSize = workspaceSize;
- cctx->workSpace = (void*)(cctx+1);
- cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
- if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
- assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
- cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
- cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
- {
- void* const ptr = cctx->blockState.nextCBlock + 1;
- cctx->entropyWorkspace = (U32*)ptr;
- }
+ if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+ cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+ cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+ cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE);
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
@@ -126,11 +137,11 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
{
assert(cctx != NULL);
assert(cctx->staticSize == 0);
- ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
ZSTD_clearAllDicts(cctx);
#ifdef ZSTD_MULTITHREAD
ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
#endif
+ ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
}
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
@@ -138,8 +149,13 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx");
- ZSTD_freeCCtxContent(cctx);
- ZSTD_free(cctx, cctx->customMem);
+ {
+ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+ ZSTD_freeCCtxContent(cctx);
+ if (!cctxInWorkspace) {
+ ZSTD_free(cctx, cctx->customMem);
+ }
+ }
return 0;
}
@@ -158,7 +174,9 @@ static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
- return sizeof(*cctx) + cctx->workSpaceSize
+ /* cctx may be in the workspace */
+ return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+ + ZSTD_cwksp_sizeof(&cctx->workspace)
+ ZSTD_sizeof_localDict(cctx->localDict)
+ ZSTD_sizeof_mtctx(cctx);
}
@@ -215,7 +233,7 @@ size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
}
size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
- RETURN_ERROR_IF(!cctxParams, GENERIC);
+ RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->compressionLevel = compressionLevel;
cctxParams->fParams.contentSizeFlag = 1;
@@ -224,26 +242,26 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
- RETURN_ERROR_IF(!cctxParams, GENERIC);
- FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
+ RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+ FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
memset(cctxParams, 0, sizeof(*cctxParams));
+ assert(!ZSTD_checkCParams(params.cParams));
cctxParams->cParams = params.cParams;
cctxParams->fParams = params.fParams;
cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
- assert(!ZSTD_checkCParams(params.cParams));
return 0;
}
/* ZSTD_assignParamsToCCtxParams() :
* params is presumed valid at this stage */
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
- ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
+ const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
{
- ZSTD_CCtx_params ret = cctxParams;
- ret.cParams = params.cParams;
- ret.fParams = params.fParams;
+ ZSTD_CCtx_params ret = *cctxParams;
+ assert(!ZSTD_checkCParams(params->cParams));
+ ret.cParams = params->cParams;
+ ret.fParams = params->fParams;
ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
- assert(!ZSTD_checkCParams(params.cParams));
return ret;
}
@@ -327,8 +345,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
return bounds;
case ZSTD_c_overlapLog:
+#ifdef ZSTD_MULTITHREAD
bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
+#else
+ bounds.lowerBound = 0;
+ bounds.upperBound = 0;
+#endif
return bounds;
case ZSTD_c_enableLongDistanceMatching:
@@ -376,7 +399,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_c_forceAttachDict:
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
bounds.lowerBound = ZSTD_dictDefaultAttach;
- bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
+ bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
return bounds;
case ZSTD_c_literalCompressionMode:
@@ -385,25 +408,22 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = ZSTD_lcm_uncompressed;
return bounds;
+ case ZSTD_c_targetCBlockSize:
+ bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+ bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+ return bounds;
+
+ case ZSTD_c_srcSizeHint:
+ bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+ bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+ return bounds;
+
default:
- { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
- return boundError;
- }
+ bounds.error = ERROR(parameter_unsupported);
+ return bounds;
}
}
-/* ZSTD_cParam_withinBounds:
- * @return 1 if value is within cParam bounds,
- * 0 otherwise */
-static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
-{
- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
- if (ZSTD_isError(bounds.error)) return 0;
- if (value < bounds.lowerBound) return 0;
- if (value > bounds.upperBound) return 0;
- return 1;
-}
-
/* ZSTD_cParam_clampBounds:
* Clamps the value into the bounded range.
*/
@@ -418,7 +438,7 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
#define BOUNDCHECK(cParam, val) { \
RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
- parameter_outOfBound); \
+ parameter_outOfBound, "Param out of bounds"); \
}
@@ -452,6 +472,8 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_ldmHashRateLog:
case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode:
+ case ZSTD_c_targetCBlockSize:
+ case ZSTD_c_srcSizeHint:
default:
return 0;
}
@@ -464,7 +486,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
if (ZSTD_isUpdateAuthorized(param)) {
cctx->cParamsChanged = 1;
} else {
- RETURN_ERROR(stage_wrong);
+ RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
} }
switch(param)
@@ -497,9 +519,11 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_ldmHashLog:
case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog:
+ case ZSTD_c_targetCBlockSize:
+ case ZSTD_c_srcSizeHint:
break;
- default: RETURN_ERROR(parameter_unsupported);
+ default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
}
@@ -516,37 +540,37 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
return (size_t)CCtxParams->format;
case ZSTD_c_compressionLevel : {
- FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
+ FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
if (value) { /* 0 : does not change current level */
CCtxParams->compressionLevel = value;
}
- if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
+ if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
return 0; /* return type (size_t) cannot represent negative values */
}
case ZSTD_c_windowLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_windowLog, value);
- CCtxParams->cParams.windowLog = value;
+ CCtxParams->cParams.windowLog = (U32)value;
return CCtxParams->cParams.windowLog;
case ZSTD_c_hashLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_hashLog, value);
- CCtxParams->cParams.hashLog = value;
+ CCtxParams->cParams.hashLog = (U32)value;
return CCtxParams->cParams.hashLog;
case ZSTD_c_chainLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_chainLog, value);
- CCtxParams->cParams.chainLog = value;
+ CCtxParams->cParams.chainLog = (U32)value;
return CCtxParams->cParams.chainLog;
case ZSTD_c_searchLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_searchLog, value);
- CCtxParams->cParams.searchLog = value;
- return value;
+ CCtxParams->cParams.searchLog = (U32)value;
+ return (size_t)value;
case ZSTD_c_minMatch :
if (value!=0) /* 0 => use default */
@@ -604,7 +628,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
#else
- FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
+ FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
CCtxParams->nbWorkers = value;
return CCtxParams->nbWorkers;
#endif
@@ -617,7 +641,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
/* Adjust to the minimum non-default value. */
if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
value = ZSTDMT_JOBSIZE_MIN;
- FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
+ FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
assert(value >= 0);
CCtxParams->jobSize = value;
return CCtxParams->jobSize;
@@ -628,7 +652,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
#else
- FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
+ FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->overlapLog = value;
return CCtxParams->overlapLog;
#endif
@@ -638,7 +662,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
#else
- FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
+ FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->rsyncable = value;
return CCtxParams->rsyncable;
#endif
@@ -667,10 +691,22 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_ldmHashRateLog :
RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
- parameter_outOfBound);
+ parameter_outOfBound, "Param out of bounds!");
CCtxParams->ldmParams.hashRateLog = value;
return CCtxParams->ldmParams.hashRateLog;
+ case ZSTD_c_targetCBlockSize :
+ if (value!=0) /* 0 ==> default */
+ BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+ CCtxParams->targetCBlockSize = value;
+ return CCtxParams->targetCBlockSize;
+
+ case ZSTD_c_srcSizeHint :
+ if (value!=0) /* 0 ==> default */
+ BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+ CCtxParams->srcSizeHint = value;
+ return CCtxParams->srcSizeHint;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
@@ -692,13 +728,13 @@ size_t ZSTD_CCtxParams_getParameter(
*value = CCtxParams->compressionLevel;
break;
case ZSTD_c_windowLog :
- *value = CCtxParams->cParams.windowLog;
+ *value = (int)CCtxParams->cParams.windowLog;
break;
case ZSTD_c_hashLog :
- *value = CCtxParams->cParams.hashLog;
+ *value = (int)CCtxParams->cParams.hashLog;
break;
case ZSTD_c_chainLog :
- *value = CCtxParams->cParams.chainLog;
+ *value = (int)CCtxParams->cParams.chainLog;
break;
case ZSTD_c_searchLog :
*value = CCtxParams->cParams.searchLog;
@@ -773,6 +809,12 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_ldmHashRateLog :
*value = CCtxParams->ldmParams.hashRateLog;
break;
+ case ZSTD_c_targetCBlockSize :
+ *value = (int)CCtxParams->targetCBlockSize;
+ break;
+ case ZSTD_c_srcSizeHint :
+ *value = (int)CCtxParams->srcSizeHint;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
@@ -789,8 +831,11 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
{
DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
- RETURN_ERROR_IF(cctx->cdict, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "The context is in the wrong stage!");
+ RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+ "Can't override parameters with cdict attached (some must "
+ "be inherited from the cdict).");
cctx->requestedParams = *params;
return 0;
@@ -799,7 +844,8 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't set pledgedSrcSize when not in init stage.");
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
return 0;
}
@@ -813,7 +859,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
{
ZSTD_localDict* const dl = &cctx->localDict;
ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
- &cctx->requestedParams, 0, dl->dictSize);
+ &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
if (dl->dict == NULL) {
/* No local dictionary. */
assert(dl->dictBuffer == NULL);
@@ -837,7 +883,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
dl->dictContentType,
cParams,
cctx->customMem);
- RETURN_ERROR_IF(!dl->cdict, memory_allocation);
+ RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
cctx->cdict = dl->cdict;
return 0;
}
@@ -846,7 +892,8 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't load a dictionary when ctx is not in init stage.");
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"no malloc for static CCtx");
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
@@ -857,7 +904,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
cctx->localDict.dict = dict;
} else {
void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem);
- RETURN_ERROR_IF(!dictBuffer, memory_allocation);
+ RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
memcpy(dictBuffer, dict, dictSize);
cctx->localDict.dictBuffer = dictBuffer;
cctx->localDict.dict = dictBuffer;
@@ -883,7 +930,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s
size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
{
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't ref a dict when ctx not in init stage.");
/* Free the existing local cdict (if any) to save memory. */
ZSTD_clearAllDicts(cctx);
cctx->cdict = cdict;
@@ -898,11 +946,14 @@ size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSiz
size_t ZSTD_CCtx_refPrefix_advanced(
ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't ref a prefix when ctx not in init stage.");
ZSTD_clearAllDicts(cctx);
- cctx->prefixDict.dict = prefix;
- cctx->prefixDict.dictSize = prefixSize;
- cctx->prefixDict.dictContentType = dictContentType;
+ if (prefix != NULL && prefixSize > 0) {
+ cctx->prefixDict.dict = prefix;
+ cctx->prefixDict.dictSize = prefixSize;
+ cctx->prefixDict.dictContentType = dictContentType;
+ }
return 0;
}
@@ -917,7 +968,8 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't reset parameters only when not in init stage.");
ZSTD_clearAllDicts(cctx);
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
@@ -930,12 +982,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
@return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{
- BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog);
- BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog);
- BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog);
- BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog);
- BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch);
- BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength);
+ BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+ BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
+ BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
+ BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+ BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
+ BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
return 0;
}
@@ -951,7 +1003,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
}
-# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int)
+# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog);
CLAMP(ZSTD_c_hashLog, cParams.hashLog);
@@ -964,7 +1016,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
/** ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
-static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
return hashLog - btScale;
@@ -974,7 +1026,7 @@ static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
* optimize `cPar` for a specified input (`srcSize` and `dictSize`).
* mostly downsize to reduce memory consumption and initialization latency.
* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
- * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention.
+ * note : `srcSize==0` means 0!
* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
@@ -985,10 +1037,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0);
- if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ )
- srcSize = minSrcSize; /* presumed small when there is a dictionary */
- else if (srcSize == 0)
- srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */
+ if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+ srcSize = minSrcSize;
/* resize windowLog if input is small enough, to use less memory */
if ( (srcSize < maxWindowResize)
@@ -1017,13 +1067,21 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
size_t dictSize)
{
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
+ if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
}
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
+
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
{
- ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+ ZSTD_compressionParameters cParams;
+ if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+ srcSizeHint = CCtxParams->srcSizeHint;
+ }
+ cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
@@ -1033,6 +1091,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
assert(!ZSTD_checkCParams(cParams));
+ /* srcSizeHint == 0 means 0 */
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
}
@@ -1043,10 +1102,19 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
- + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+ size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+ /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+ * surrounded by redzones in ASAN. */
+ size_t const tableSpace = chainSize * sizeof(U32)
+ + hSize * sizeof(U32)
+ + h3Size * sizeof(U32);
+ size_t const optPotentialSpace =
+ ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+ + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
: 0;
@@ -1059,24 +1127,40 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
{ ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(params, 0, 0);
+ ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
- size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
- size_t const entropySpace = HUF_WORKSPACE_SIZE;
- size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
+ size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+ + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+ size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+ size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
- size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
-
- size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
- matchStateSize + ldmSpace + ldmSeqSpace;
-
- DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
- DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
- return sizeof(ZSTD_CCtx) + neededSpace;
+ size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
+
+ /* estimateCCtxSize is for one-shot compression. So no buffers should
+ * be needed. However, we still allocate two 0-sized buffers, which can
+ * take space under ASAN. */
+ size_t const bufferSpace = ZSTD_cwksp_alloc_size(0)
+ + ZSTD_cwksp_alloc_size(0);
+
+ size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
+
+ size_t const neededSpace =
+ cctxSpace +
+ entropySpace +
+ blockStateSpace +
+ ldmSpace +
+ ldmSeqSpace +
+ matchStateSize +
+ tokenSpace +
+ bufferSpace;
+
+ DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+ return neededSpace;
}
}
@@ -1088,7 +1172,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
{
- ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
return ZSTD_estimateCCtxSize_usingCParams(cParams);
}
@@ -1107,12 +1191,13 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
{ ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(params, 0, 0);
+ ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
- size_t const streamingSize = inBuffSize + outBuffSize;
+ size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
+ + ZSTD_cwksp_alloc_size(outBuffSize);
return CCtxSize + streamingSize;
}
@@ -1126,7 +1211,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
{
- ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
return ZSTD_estimateCStreamSize_usingCParams(cParams);
}
@@ -1180,17 +1265,6 @@ size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
}
-
-
-static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
- ZSTD_compressionParameters cParams2)
-{
- return (cParams1.hashLog == cParams2.hashLog)
- & (cParams1.chainLog == cParams2.chainLog)
- & (cParams1.strategy == cParams2.strategy) /* opt parser space */
- & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */
-}
-
static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
@@ -1205,72 +1279,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
assert(cParams1.strategy == cParams2.strategy);
}
-/** The parameters are equivalent if ldm is not enabled in both sets or
- * all the parameters are equivalent. */
-static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
- ldmParams_t ldmParams2)
-{
- return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
- (ldmParams1.enableLdm == ldmParams2.enableLdm &&
- ldmParams1.hashLog == ldmParams2.hashLog &&
- ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
- ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
- ldmParams1.hashRateLog == ldmParams2.hashRateLog);
-}
-
-typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
-
-/* ZSTD_sufficientBuff() :
- * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
- * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
-static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
- size_t maxNbLit1,
- ZSTD_buffered_policy_e buffPol2,
- ZSTD_compressionParameters cParams2,
- U64 pledgedSrcSize)
-{
- size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
- size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
- size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4);
- size_t const maxNbLit2 = blockSize2;
- size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
- DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
- (U32)neededBufferSize2, (U32)bufferSize1);
- DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
- (U32)maxNbSeq2, (U32)maxNbSeq1);
- DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
- (U32)maxNbLit2, (U32)maxNbLit1);
- return (maxNbLit2 <= maxNbLit1)
- & (maxNbSeq2 <= maxNbSeq1)
- & (neededBufferSize2 <= bufferSize1);
-}
-
-/** Equivalence for resetCCtx purposes */
-static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
- ZSTD_CCtx_params params2,
- size_t buffSize1,
- size_t maxNbSeq1, size_t maxNbLit1,
- ZSTD_buffered_policy_e buffPol2,
- U64 pledgedSrcSize)
-{
- DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
- if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
- DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
- return 0;
- }
- if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
- DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
- return 0;
- }
- if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
- params2.cParams, pledgedSrcSize)) {
- DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
- return 0;
- }
- return 1;
-}
-
-static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
{
int i;
for (i = 0; i < ZSTD_REP_NUM; ++i)
@@ -1282,132 +1291,143 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
}
/*! ZSTD_invalidateMatchState()
- * Invalidate all the matches in the match finder tables.
- * Requires nextSrc and base to be set (can be NULL).
+ * Invalidate all the matches in the match finder tables.
+ * Requires nextSrc and base to be set (can be NULL).
*/
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
{
ZSTD_window_clear(&ms->window);
ms->nextToUpdate = ms->window.dictLimit;
- ms->nextToUpdate3 = ms->window.dictLimit;
ms->loadedDictEnd = 0;
ms->opt.litLengthSum = 0; /* force reset of btopt stats */
ms->dictMatchState = NULL;
}
-/*! ZSTD_continueCCtx() :
- * reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
-{
- size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
- size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+ ZSTDb_not_buffered,
+ ZSTDb_buffered
+} ZSTD_buffered_policy_e;
- cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
- cctx->appliedParams = params;
- cctx->blockState.matchState.cParams = params.cParams;
- cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
- cctx->consumedSrcSize = 0;
- cctx->producedCSize = 0;
- if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
- cctx->appliedParams.fParams.contentSizeFlag = 0;
- DEBUGLOG(4, "pledged content size : %u ; flag : %u",
- (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
- cctx->stage = ZSTDcs_init;
- cctx->dictID = 0;
- if (params.ldmParams.enableLdm)
- ZSTD_window_clear(&cctx->ldmState.window);
- ZSTD_referenceExternalSequences(cctx, NULL, 0);
- ZSTD_invalidateMatchState(&cctx->blockState.matchState);
- ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
- XXH64_reset(&cctx->xxhState, 0);
- return 0;
-}
+/**
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+ ZSTDcrp_makeClean,
+ ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+/**
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+ ZSTDirp_continue,
+ ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+ ZSTD_resetTarget_CDict,
+ ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
-static void*
+static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
- void* ptr,
+ ZSTD_cwksp* ws,
const ZSTD_compressionParameters* cParams,
- ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
+ const ZSTD_compResetPolicy_e crp,
+ const ZSTD_indexResetPolicy_e forceResetIndex,
+ const ZSTD_resetTarget_e forWho)
{
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
- U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+ U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+ size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
- assert(((size_t)ptr & 3) == 0);
+ DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+ if (forceResetIndex == ZSTDirp_reset) {
+ ZSTD_window_init(&ms->window);
+ ZSTD_cwksp_mark_tables_dirty(ws);
+ }
ms->hashLog3 = hashLog3;
- memset(&ms->window, 0, sizeof(ms->window));
- ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
- ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
- ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
+
ZSTD_invalidateMatchState(ms);
+ assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+ ZSTD_cwksp_clear_tables(ws);
+
+ DEBUGLOG(5, "reserving table space");
+ /* table Space */
+ ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+ ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+ ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+ RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+ "failed a workspace allocation in ZSTD_reset_matchState");
+
+ DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+ if (crp!=ZSTDcrp_leaveDirty) {
+ /* reset tables only */
+ ZSTD_cwksp_clean_tables(ws);
+ }
+
/* opt parser space */
- if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
+ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
- ms->opt.litFreq = (unsigned*)ptr;
- ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
- ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
- ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
- ptr = ms->opt.offCodeFreq + (MaxOff+1);
- ms->opt.matchTable = (ZSTD_match_t*)ptr;
- ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
- ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
- ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
+ ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+ ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+ ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+ ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+ ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+ ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
- /* table Space */
- DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
- ms->hashTable = (U32*)(ptr);
- ms->chainTable = ms->hashTable + hSize;
- ms->hashTable3 = ms->chainTable + chainSize;
- ptr = ms->hashTable3 + h3Size;
-
ms->cParams = *cParams;
- assert(((size_t)ptr & 3) == 0);
- return ptr;
+ RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+ "failed a workspace allocation in ZSTD_reset_matchState");
+
+ return 0;
}
-#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
-#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
- * during at least this number of times,
- * context's memory usage is considered wasteful,
- * because it's sized to handle a worst case scenario which rarely happens.
- * In which case, resize it down to free some memory */
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+ return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_CCtx_params params,
- U64 pledgedSrcSize,
+ U64 const pledgedSrcSize,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
+ ZSTD_cwksp* const ws = &zc->workspace;
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
(U32)pledgedSrcSize, params.cParams.windowLog);
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
- if (crp == ZSTDcrp_continue) {
- if (ZSTD_equivalentParams(zc->appliedParams, params,
- zc->inBuffSize,
- zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
- zbuff, pledgedSrcSize)) {
- DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
- zc->appliedParams.cParams.windowLog, zc->blockSize);
- zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
- if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
- return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
- } }
- DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
+ zc->isFirstBlock = 1;
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
@@ -1421,58 +1441,74 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
- size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
+ size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+ + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
- void* ptr; /* used to partition workSpace */
- /* Check if workSpace is large enough, alloc a new one if needed */
- { size_t const entropySpace = HUF_WORKSPACE_SIZE;
- size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
- size_t const bufferSpace = buffInSize + buffOutSize;
+ ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset;
+
+ if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
+ needsIndexReset = ZSTDirp_reset;
+ }
+
+ if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+ /* Check if workspace is large enough, alloc a new one if needed */
+ { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+ size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
+ size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+ size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
- size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
+ size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
- size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
- ldmSeqSpace + matchStateSize + tokenSpace +
- bufferSpace;
+ size_t const neededSpace =
+ cctxSpace +
+ entropySpace +
+ blockStateSpace +
+ ldmSpace +
+ ldmSeqSpace +
+ matchStateSize +
+ tokenSpace +
+ bufferSpace;
- int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
- int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
- int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
- zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
+ int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+ int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
- if (workSpaceTooSmall || workSpaceWasteful) {
- DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
- zc->workSpaceSize >> 10,
+ if (workspaceTooSmall || workspaceWasteful) {
+ DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+ ZSTD_cwksp_sizeof(ws) >> 10,
neededSpace >> 10);
RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
- zc->workSpaceSize = 0;
- ZSTD_free(zc->workSpace, zc->customMem);
- zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
- RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation);
- zc->workSpaceSize = neededSpace;
- zc->workSpaceOversizedDuration = 0;
+ needsIndexReset = ZSTDirp_reset;
+ ZSTD_cwksp_free(ws, zc->customMem);
+ FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+ DEBUGLOG(5, "reserving object space");
/* Statically sized space.
* entropyWorkspace never moves,
* though prev/next block swap places */
- assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
- assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
- zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
- zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
- ptr = zc->blockState.nextCBlock + 1;
- zc->entropyWorkspace = (U32*)ptr;
+ assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+ zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+ RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+ zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+ RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+ zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
+ RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
} }
+ ZSTD_cwksp_clear(ws);
+
/* init params */
zc->appliedParams = params;
zc->blockState.matchState.cParams = params.cParams;
@@ -1491,57 +1527,60 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
- ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
-
- /* ldm hash table */
- /* initialize bucketOffsets table later for pointer alignment */
- if (params.ldmParams.enableLdm) {
- size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
- memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t));
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->ldmState.hashTable = (ldmEntry_t*)ptr;
- ptr = zc->ldmState.hashTable + ldmHSize;
- zc->ldmSequences = (rawSeq*)ptr;
- ptr = zc->ldmSequences + maxNbLdmSeq;
- zc->maxNbLdmSequences = maxNbLdmSeq;
-
- memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
- }
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
-
- ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
-
- /* sequences storage */
- zc->seqStore.maxNbSeq = maxNbSeq;
- zc->seqStore.sequencesStart = (seqDef*)ptr;
- ptr = zc->seqStore.sequencesStart + maxNbSeq;
- zc->seqStore.llCode = (BYTE*) ptr;
- zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
- zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
- zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
+ zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;
- ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
+
+ /* buffers */
+ zc->inBuffSize = buffInSize;
+ zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+ zc->outBuffSize = buffOutSize;
+ zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
/* ldm bucketOffsets table */
if (params.ldmParams.enableLdm) {
+ /* TODO: avoid memset? */
size_t const ldmBucketSize =
((size_t)1) << (params.ldmParams.hashLog -
params.ldmParams.bucketSizeLog);
- memset(ptr, 0, ldmBucketSize);
- zc->ldmState.bucketOffsets = (BYTE*)ptr;
- ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
- ZSTD_window_clear(&zc->ldmState.window);
+ zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
+ memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
}
+
+ /* sequences storage */
ZSTD_referenceExternalSequences(zc, NULL, 0);
+ zc->seqStore.maxNbSeq = maxNbSeq;
+ zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+ zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &zc->blockState.matchState,
+ ws,
+ &params.cParams,
+ crp,
+ needsIndexReset,
+ ZSTD_resetTarget_CCtx), "");
- /* buffers */
- zc->inBuffSize = buffInSize;
- zc->inBuff = (char*)ptr;
- zc->outBuffSize = buffOutSize;
- zc->outBuff = zc->inBuff + buffInSize;
+ /* ldm hash table */
+ if (params.ldmParams.enableLdm) {
+ /* TODO: avoid memset? */
+ size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+ memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ ZSTD_window_init(&zc->ldmState.window);
+ ZSTD_window_clear(&zc->ldmState.window);
+ zc->ldmState.loadedDictEnd = 0;
+ }
+
+ DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+ zc->initialized = 1;
return 0;
}
@@ -1575,40 +1614,39 @@ static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
};
static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
U64 pledgedSrcSize)
{
size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
return ( pledgedSrcSize <= cutoff
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
- || params.attachDictPref == ZSTD_dictForceAttach )
- && params.attachDictPref != ZSTD_dictForceCopy
- && !params.forceWindow; /* dictMatchState isn't correctly
+ || params->attachDictPref == ZSTD_dictForceAttach )
+ && params->attachDictPref != ZSTD_dictForceCopy
+ && !params->forceWindow; /* dictMatchState isn't correctly
* handled in _enforceMaxDist */
}
-static size_t ZSTD_resetCCtx_byAttachingCDict(
- ZSTD_CCtx* cctx,
- const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
- U64 pledgedSrcSize,
- ZSTD_buffered_policy_e zbuff)
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+ const ZSTD_CDict* cdict,
+ ZSTD_CCtx_params params,
+ U64 pledgedSrcSize,
+ ZSTD_buffered_policy_e zbuff)
{
- {
- const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+ { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0);
/* Resize working context table params for input only, since the dict
* has its own tables. */
+ /* pledgeSrcSize == 0 means 0! */
params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
params.cParams.windowLog = windowLog;
- ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_continue, zbuff);
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_makeClean, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
}
- {
- const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+ { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base);
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
if (cdictLen == 0) {
@@ -1625,9 +1663,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
cctx->blockState.matchState.window.base + cdictEnd;
ZSTD_window_clear(&cctx->blockState.matchState.window);
}
+ /* loadedDictEnd is expressed within the referential of the active context */
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
- }
- }
+ } }
cctx->dictID = cdict->dictID;
@@ -1652,36 +1690,41 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
/* Copy only compression parameters related to tables. */
params.cParams = *cdict_cParams;
params.cParams.windowLog = windowLog;
- ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_noMemset, zbuff);
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_leaveDirty, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
}
+ ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
/* copy tables */
{ size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
- size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
- assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
- assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
- memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+
+ memcpy(cctx->blockState.matchState.hashTable,
+ cdict->matchState.hashTable,
+ hSize * sizeof(U32));
+ memcpy(cctx->blockState.matchState.chainTable,
+ cdict->matchState.chainTable,
+ chainSize * sizeof(U32));
}
/* Zero the hashTable3, since the cdict never fills it */
- { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
+ { int const h3log = cctx->blockState.matchState.hashLog3;
+ size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
assert(cdict->matchState.hashLog3 == 0);
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
}
+ ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
/* copy dictionary offsets */
{ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
- dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
@@ -1698,7 +1741,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
* in-place. We decide here which strategy to use. */
static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
@@ -1708,10 +1751,10 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
return ZSTD_resetCCtx_byAttachingCDict(
- cctx, cdict, params, pledgedSrcSize, zbuff);
+ cctx, cdict, *params, pledgedSrcSize, zbuff);
} else {
return ZSTD_resetCCtx_byCopyingCDict(
- cctx, cdict, params, pledgedSrcSize, zbuff);
+ cctx, cdict, *params, pledgedSrcSize, zbuff);
}
}
@@ -1729,7 +1772,8 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
ZSTD_buffered_policy_e zbuff)
{
DEBUGLOG(5, "ZSTD_copyCCtx_internal");
- RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong);
+ RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+ "Can't copy a ctx that's not in init stage.");
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
@@ -1737,7 +1781,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
params.cParams = srcCCtx->appliedParams.cParams;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
- ZSTDcrp_noMemset, zbuff);
+ ZSTDcrp_leaveDirty, zbuff);
assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
@@ -1745,23 +1789,33 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
}
+ ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
- size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
- memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+ int const h3log = srcCCtx->blockState.matchState.hashLog3;
+ size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+ memcpy(dstCCtx->blockState.matchState.hashTable,
+ srcCCtx->blockState.matchState.hashTable,
+ hSize * sizeof(U32));
+ memcpy(dstCCtx->blockState.matchState.chainTable,
+ srcCCtx->blockState.matchState.chainTable,
+ chainSize * sizeof(U32));
+ memcpy(dstCCtx->blockState.matchState.hashTable3,
+ srcCCtx->blockState.matchState.hashTable3,
+ h3Size * sizeof(U32));
}
+ ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
/* copy dictionary offsets */
{
const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
- dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
dstCCtx->dictID = srcCCtx->dictID;
@@ -1806,6 +1860,20 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
int rowNb;
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the table re-use logic is sound, and that we don't
+ * access table space that we haven't cleaned, we re-"poison" the table
+ * space every time we mark it dirty.
+ *
+ * This function however is intended to operate on those dirty tables and
+ * re-clean them. So when this function is used correctly, we can unpoison
+ * the memory it operated on. This introduces a blind spot though, since
+ * if we now try to operate on __actually__ poisoned memory, we will not
+ * detect that. */
+ __msan_unpoison(table, size * sizeof(U32));
+#endif
+
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
@@ -1831,16 +1899,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
{
- ZSTD_matchState_t* const ms = &zc->blockState.matchState;
- { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
+ { U32 const hSize = (U32)1 << params->cParams.hashLog;
ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
}
- if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
- U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
- if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
+ if (params->cParams.strategy != ZSTD_fast) {
+ U32 const chainSize = (U32)1 << params->cParams.chainLog;
+ if (params->cParams.strategy == ZSTD_btlazy2)
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
else
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
@@ -1859,165 +1926,6 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
/* See doc/zstd_compression_format.md for detailed format description */
-static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
-{
- U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
- RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
- dstSize_tooSmall);
- MEM_writeLE24(dst, cBlockHeader24);
- memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
- return ZSTD_blockHeaderSize + srcSize;
-}
-
-static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
- BYTE* const ostart = (BYTE* const)dst;
- U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
-
- RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
-
- switch(flSize)
- {
- case 1: /* 2 - 1 - 5 */
- ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
- break;
- case 2: /* 2 - 2 - 12 */
- MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
- break;
- case 3: /* 2 - 2 - 20 */
- MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
- break;
- default: /* not necessary : flSize is {1,2,3} */
- assert(0);
- }
-
- memcpy(ostart + flSize, src, srcSize);
- return srcSize + flSize;
-}
-
-static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
- BYTE* const ostart = (BYTE* const)dst;
- U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
-
- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
-
- switch(flSize)
- {
- case 1: /* 2 - 1 - 5 */
- ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
- break;
- case 2: /* 2 - 2 - 12 */
- MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
- break;
- case 3: /* 2 - 2 - 20 */
- MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
- break;
- default: /* not necessary : flSize is {1,2,3} */
- assert(0);
- }
-
- ostart[flSize] = *(const BYTE*)src;
- return flSize+1;
-}
-
-
-/* ZSTD_minGain() :
- * minimum compression required
- * to generate a compress block or a compressed literals section.
- * note : use same formula for both situations */
-static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
-{
- U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
- ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
- return (srcSize >> minlog) + 2;
-}
-
-static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
- ZSTD_hufCTables_t* nextHuf,
- ZSTD_strategy strategy, int disableLiteralCompression,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
- void* workspace, size_t wkspSize,
- const int bmi2)
-{
- size_t const minGain = ZSTD_minGain(srcSize, strategy);
- size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
- BYTE* const ostart = (BYTE*)dst;
- U32 singleStream = srcSize < 256;
- symbolEncodingType_e hType = set_compressed;
- size_t cLitSize;
-
- DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
- disableLiteralCompression);
-
- /* Prepare nextEntropy assuming reusing the existing table */
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
- if (disableLiteralCompression)
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-
- /* small ? don't even attempt compression (speed opt) */
-# define COMPRESS_LITERALS_SIZE_MIN 63
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- }
-
- RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
- { HUF_repeat repeat = prevHuf->repeatMode;
- int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
- if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
- cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
- : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
- if (repeat != HUF_repeat_none) {
- /* reused the existing table */
- hType = set_repeat;
- }
- }
-
- if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- }
- if (cLitSize==1) {
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
- }
-
- if (hType == set_compressed) {
- /* using a newly constructed table */
- nextHuf->repeatMode = HUF_repeat_check;
- }
-
- /* Build header */
- switch(lhSize)
- {
- case 3: /* 2 - 2 - 10 - 10 */
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
- MEM_writeLE24(ostart, lhc);
- break;
- }
- case 4: /* 2 - 2 - 14 - 14 */
- { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
- MEM_writeLE32(ostart, lhc);
- break;
- }
- case 5: /* 2 - 2 - 18 - 18 */
- { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
- MEM_writeLE32(ostart, lhc);
- ostart[4] = (BYTE)(cLitSize >> 10);
- break;
- }
- default: /* not possible : lhSize is {3,4,5} */
- assert(0);
- }
- return lhSize+cLitSize;
-}
-
-
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
{
const seqDef* const sequences = seqStorePtr->sequencesStart;
@@ -2040,431 +1948,14 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
-
-/**
- * -log2(x / 256) lookup table for x in [0, 256).
- * If x == 0: Return 0
- * Else: Return floor(-log2(x / 256) * 256)
- */
-static unsigned const kInverseProbabilityLog256[256] = {
- 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
- 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
- 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
- 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
- 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
- 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
- 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
- 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
- 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
- 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
- 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
- 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
- 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
- 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
- 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
- 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
- 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
- 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
- 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
- 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
- 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
- 5, 4, 2, 1,
-};
-
-
-/**
- * Returns the cost in bits of encoding the distribution described by count
- * using the entropy bound.
- */
-static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
-{
- unsigned cost = 0;
- unsigned s;
- for (s = 0; s <= max; ++s) {
- unsigned norm = (unsigned)((256 * count[s]) / total);
- if (count[s] != 0 && norm == 0)
- norm = 1;
- assert(count[s] < total);
- cost += count[s] * kInverseProbabilityLog256[norm];
- }
- return cost >> 8;
-}
-
-
-/**
- * Returns the cost in bits of encoding the distribution in count using the
- * table described by norm. The max symbol support by norm is assumed >= max.
- * norm must be valid for every symbol with non-zero probability in count.
- */
-static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
- unsigned const* count, unsigned const max)
-{
- unsigned const shift = 8 - accuracyLog;
- size_t cost = 0;
- unsigned s;
- assert(accuracyLog <= 8);
- for (s = 0; s <= max; ++s) {
- unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
- unsigned const norm256 = normAcc << shift;
- assert(norm256 > 0);
- assert(norm256 < 256);
- cost += count[s] * kInverseProbabilityLog256[norm256];
- }
- return cost >> 8;
-}
-
-
-static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
- void const* ptr = ctable;
- U16 const* u16ptr = (U16 const*)ptr;
- U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
- return maxSymbolValue;
-}
-
-
-/**
- * Returns the cost in bits of encoding the distribution in count using ctable.
- * Returns an error if ctable cannot represent all the symbols in count.
- */
-static size_t ZSTD_fseBitCost(
- FSE_CTable const* ctable,
- unsigned const* count,
- unsigned const max)
-{
- unsigned const kAccuracyLog = 8;
- size_t cost = 0;
- unsigned s;
- FSE_CState_t cstate;
- FSE_initCState(&cstate, ctable);
- RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
- "Repeat FSE_CTable has maxSymbolValue %u < %u",
- ZSTD_getFSEMaxSymbolValue(ctable), max);
- for (s = 0; s <= max; ++s) {
- unsigned const tableLog = cstate.stateLog;
- unsigned const badCost = (tableLog + 1) << kAccuracyLog;
- unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
- if (count[s] == 0)
- continue;
- RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
- "Repeat FSE_CTable has Prob[%u] == 0", s);
- cost += count[s] * bitCost;
- }
- return cost >> kAccuracyLog;
-}
-
-/**
- * Returns the cost in bytes of encoding the normalized count header.
- * Returns an error if any of the helper functions return an error.
- */
-static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
- size_t const nbSeq, unsigned const FSELog)
-{
- BYTE wksp[FSE_NCOUNTBOUND];
- S16 norm[MaxSeq + 1];
- const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
- FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
- return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
-}
-
-
-typedef enum {
- ZSTD_defaultDisallowed = 0,
- ZSTD_defaultAllowed = 1
-} ZSTD_defaultPolicy_e;
-
-MEM_STATIC symbolEncodingType_e
-ZSTD_selectEncodingType(
- FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
- size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
- FSE_CTable const* prevCTable,
- short const* defaultNorm, U32 defaultNormLog,
- ZSTD_defaultPolicy_e const isDefaultAllowed,
- ZSTD_strategy const strategy)
-{
- ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
- if (mostFrequent == nbSeq) {
- *repeatMode = FSE_repeat_none;
- if (isDefaultAllowed && nbSeq <= 2) {
- /* Prefer set_basic over set_rle when there are 2 or less symbols,
- * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
- * If basic encoding isn't possible, always choose RLE.
- */
- DEBUGLOG(5, "Selected set_basic");
- return set_basic;
- }
- DEBUGLOG(5, "Selected set_rle");
- return set_rle;
- }
- if (strategy < ZSTD_lazy) {
- if (isDefaultAllowed) {
- size_t const staticFse_nbSeq_max = 1000;
- size_t const mult = 10 - strategy;
- size_t const baseLog = 3;
- size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
- assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
- assert(mult <= 9 && mult >= 7);
- if ( (*repeatMode == FSE_repeat_valid)
- && (nbSeq < staticFse_nbSeq_max) ) {
- DEBUGLOG(5, "Selected set_repeat");
- return set_repeat;
- }
- if ( (nbSeq < dynamicFse_nbSeq_min)
- || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
- DEBUGLOG(5, "Selected set_basic");
- /* The format allows default tables to be repeated, but it isn't useful.
- * When using simple heuristics to select encoding type, we don't want
- * to confuse these tables with dictionaries. When running more careful
- * analysis, we don't need to waste time checking both repeating tables
- * and default tables.
- */
- *repeatMode = FSE_repeat_none;
- return set_basic;
- }
- }
- } else {
- size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
- size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
- size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
- size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
-
- if (isDefaultAllowed) {
- assert(!ZSTD_isError(basicCost));
- assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
- }
- assert(!ZSTD_isError(NCountCost));
- assert(compressedCost < ERROR(maxCode));
- DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
- (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
- if (basicCost <= repeatCost && basicCost <= compressedCost) {
- DEBUGLOG(5, "Selected set_basic");
- assert(isDefaultAllowed);
- *repeatMode = FSE_repeat_none;
- return set_basic;
- }
- if (repeatCost <= compressedCost) {
- DEBUGLOG(5, "Selected set_repeat");
- assert(!ZSTD_isError(repeatCost));
- return set_repeat;
- }
- assert(compressedCost < basicCost && compressedCost < repeatCost);
- }
- DEBUGLOG(5, "Selected set_compressed");
- *repeatMode = FSE_repeat_check;
- return set_compressed;
-}
-
-MEM_STATIC size_t
-ZSTD_buildCTable(void* dst, size_t dstCapacity,
- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
- unsigned* count, U32 max,
- const BYTE* codeTable, size_t nbSeq,
- const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
- const FSE_CTable* prevCTable, size_t prevCTableSize,
- void* workspace, size_t workspaceSize)
-{
- BYTE* op = (BYTE*)dst;
- const BYTE* const oend = op + dstCapacity;
- DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
-
- switch (type) {
- case set_rle:
- FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
- RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
- *op = codeTable[0];
- return 1;
- case set_repeat:
- memcpy(nextCTable, prevCTable, prevCTableSize);
- return 0;
- case set_basic:
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
- return 0;
- case set_compressed: {
- S16 norm[MaxSeq + 1];
- size_t nbSeq_1 = nbSeq;
- const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
- if (count[codeTable[nbSeq-1]] > 1) {
- count[codeTable[nbSeq-1]]--;
- nbSeq_1--;
- }
- assert(nbSeq_1 > 1);
- FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
- { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
- FORWARD_IF_ERROR(NCountSize);
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
- return NCountSize;
- }
- }
- default: assert(0); RETURN_ERROR(GENERIC);
- }
-}
-
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_encodeSequences_body(
- void* dst, size_t dstCapacity,
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
- seqDef const* sequences, size_t nbSeq, int longOffsets)
-{
- BIT_CStream_t blockStream;
- FSE_CState_t stateMatchLength;
- FSE_CState_t stateOffsetBits;
- FSE_CState_t stateLitLength;
-
- RETURN_ERROR_IF(
- ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
- dstSize_tooSmall, "not enough space remaining");
- DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
- (int)(blockStream.endPtr - blockStream.startPtr),
- (unsigned)dstCapacity);
-
- /* first symbols */
- FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
- FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
- FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
- BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
- if (MEM_32bits()) BIT_flushBits(&blockStream);
- BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
- if (MEM_32bits()) BIT_flushBits(&blockStream);
- if (longOffsets) {
- U32 const ofBits = ofCodeTable[nbSeq-1];
- int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
- if (extraBits) {
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
- BIT_flushBits(&blockStream);
- }
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
- ofBits - extraBits);
- } else {
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
- }
- BIT_flushBits(&blockStream);
-
- { size_t n;
- for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
- BYTE const llCode = llCodeTable[n];
- BYTE const ofCode = ofCodeTable[n];
- BYTE const mlCode = mlCodeTable[n];
- U32 const llBits = LL_bits[llCode];
- U32 const ofBits = ofCode;
- U32 const mlBits = ML_bits[mlCode];
- DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
- (unsigned)sequences[n].litLength,
- (unsigned)sequences[n].matchLength + MINMATCH,
- (unsigned)sequences[n].offset);
- /* 32b*/ /* 64b*/
- /* (7)*/ /* (7)*/
- FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
- FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
- if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
- FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
- if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
- BIT_flushBits(&blockStream); /* (7)*/
- BIT_addBits(&blockStream, sequences[n].litLength, llBits);
- if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
- BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
- if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
- if (longOffsets) {
- int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
- if (extraBits) {
- BIT_addBits(&blockStream, sequences[n].offset, extraBits);
- BIT_flushBits(&blockStream); /* (7)*/
- }
- BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
- ofBits - extraBits); /* 31 */
- } else {
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
- }
- BIT_flushBits(&blockStream); /* (7)*/
- DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
- } }
-
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
- FSE_flushCState(&blockStream, &stateMatchLength);
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
- FSE_flushCState(&blockStream, &stateOffsetBits);
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
- FSE_flushCState(&blockStream, &stateLitLength);
-
- { size_t const streamSize = BIT_closeCStream(&blockStream);
- RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
- return streamSize;
- }
-}
-
-static size_t
-ZSTD_encodeSequences_default(
- void* dst, size_t dstCapacity,
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
- seqDef const* sequences, size_t nbSeq, int longOffsets)
-{
- return ZSTD_encodeSequences_body(dst, dstCapacity,
- CTable_MatchLength, mlCodeTable,
- CTable_OffsetBits, ofCodeTable,
- CTable_LitLength, llCodeTable,
- sequences, nbSeq, longOffsets);
-}
-
-
-#if DYNAMIC_BMI2
-
-static TARGET_ATTRIBUTE("bmi2") size_t
-ZSTD_encodeSequences_bmi2(
- void* dst, size_t dstCapacity,
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
- seqDef const* sequences, size_t nbSeq, int longOffsets)
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
{
- return ZSTD_encodeSequences_body(dst, dstCapacity,
- CTable_MatchLength, mlCodeTable,
- CTable_OffsetBits, ofCodeTable,
- CTable_LitLength, llCodeTable,
- sequences, nbSeq, longOffsets);
-}
-
-#endif
-
-static size_t ZSTD_encodeSequences(
- void* dst, size_t dstCapacity,
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
-{
- DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
-#if DYNAMIC_BMI2
- if (bmi2) {
- return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
- CTable_MatchLength, mlCodeTable,
- CTable_OffsetBits, ofCodeTable,
- CTable_LitLength, llCodeTable,
- sequences, nbSeq, longOffsets);
- }
-#endif
- (void)bmi2;
- return ZSTD_encodeSequences_default(dst, dstCapacity,
- CTable_MatchLength, mlCodeTable,
- CTable_OffsetBits, ofCodeTable,
- CTable_LitLength, llCodeTable,
- sequences, nbSeq, longOffsets);
-}
-
-static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
-{
- switch (cctxParams->literalCompressionMode) {
- case ZSTD_lcm_huffman:
- return 0;
- case ZSTD_lcm_uncompressed:
- return 1;
- default:
- assert(0 /* impossible: pre-validated */);
- /* fall-through */
- case ZSTD_lcm_auto:
- return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
- }
+ DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+ return (cctxParams->targetCBlockSize != 0);
}
/* ZSTD_compressSequences_internal():
@@ -2475,7 +1966,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWkspSize,
const int bmi2)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
@@ -2492,52 +1983,59 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart;
- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+ size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
BYTE* seqHead;
BYTE* lastNCount = NULL;
+ DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
- DEBUGLOG(5, "ZSTD_compressSequences_internal");
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
- size_t const litSize = seqStorePtr->lit - literals;
+ size_t const litSize = (size_t)(seqStorePtr->lit - literals);
size_t const cSize = ZSTD_compressLiterals(
&prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy,
ZSTD_disableLiteralsCompression(cctxParams),
op, dstCapacity,
literals, litSize,
- workspace, wkspSize,
+ entropyWorkspace, entropyWkspSize,
bmi2);
- FORWARD_IF_ERROR(cSize);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
assert(cSize <= dstCapacity);
op += cSize;
}
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
- dstSize_tooSmall);
- if (nbSeq < 0x7F)
+ dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+ if (nbSeq < 128) {
*op++ = (BYTE)nbSeq;
- else if (nbSeq < LONGNBSEQ)
- op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
- else
- op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+ } else if (nbSeq < LONGNBSEQ) {
+ op[0] = (BYTE)((nbSeq>>8) + 0x80);
+ op[1] = (BYTE)nbSeq;
+ op+=2;
+ } else {
+ op[0]=0xFF;
+ MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+ op+=3;
+ }
+ assert(op <= oend);
if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
- return op - ostart;
+ return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
seqHead = op++;
+ assert(op <= oend);
/* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ unsigned max = MaxLL;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
@@ -2547,18 +2045,24 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
- count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
- prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
- workspace, wkspSize);
- FORWARD_IF_ERROR(countSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+ count, max, llCodeTable, nbSeq,
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
+ prevEntropy->fse.litlengthCTable,
+ sizeof(prevEntropy->fse.litlengthCTable),
+ entropyWorkspace, entropyWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
if (LLtype == set_compressed)
lastNCount = op;
op += countSize;
+ assert(op <= oend);
} }
/* build CTable for Offsets */
{ unsigned max = MaxOff;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(
+ count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
@@ -2569,18 +2073,24 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
- count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
- prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
- workspace, wkspSize);
- FORWARD_IF_ERROR(countSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+ count, max, ofCodeTable, nbSeq,
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+ prevEntropy->fse.offcodeCTable,
+ sizeof(prevEntropy->fse.offcodeCTable),
+ entropyWorkspace, entropyWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
if (Offtype == set_compressed)
lastNCount = op;
op += countSize;
+ assert(op <= oend);
} }
/* build CTable for MatchLengths */
{ unsigned max = MaxML;
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ size_t const mostFrequent = HIST_countFast_wksp(
+ count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
@@ -2589,27 +2099,33 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
- count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
- prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
- workspace, wkspSize);
- FORWARD_IF_ERROR(countSize);
+ { size_t const countSize = ZSTD_buildCTable(
+ op, (size_t)(oend - op),
+ CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+ count, max, mlCodeTable, nbSeq,
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
+ prevEntropy->fse.matchlengthCTable,
+ sizeof(prevEntropy->fse.matchlengthCTable),
+ entropyWorkspace, entropyWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
if (MLtype == set_compressed)
lastNCount = op;
op += countSize;
+ assert(op <= oend);
} }
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
{ size_t const bitstreamSize = ZSTD_encodeSequences(
- op, oend - op,
+ op, (size_t)(oend - op),
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq,
longOffsets, bmi2);
- FORWARD_IF_ERROR(bitstreamSize);
+ FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
op += bitstreamSize;
+ assert(op <= oend);
/* zstd versions <= 1.3.4 mistakenly report corruption when
* FSE_readNCount() receives a buffer < 4 bytes.
* Fixed by https://github.com/facebook/zstd/pull/1146.
@@ -2628,7 +2144,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
}
DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
- return op - ostart;
+ return (size_t)(op - ostart);
}
MEM_STATIC size_t
@@ -2638,20 +2154,20 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
size_t srcSize,
- void* workspace, size_t wkspSize,
+ void* entropyWorkspace, size_t entropyWkspSize,
int bmi2)
{
size_t const cSize = ZSTD_compressSequences_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
dst, dstCapacity,
- workspace, wkspSize, bmi2);
+ entropyWorkspace, entropyWkspSize, bmi2);
if (cSize == 0) return 0;
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
return 0; /* block not compressed */
- FORWARD_IF_ERROR(cSize);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed");
/* Check compressibility */
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
@@ -2721,30 +2237,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->longLengthID = 0;
}
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
- size_t cSize;
- DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
- (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
+ DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-
/* Assert that we have correctly flushed the ctx params into the ms's copy */
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
-
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
- cSize = 0;
- goto out; /* don't even attempt compression below a certain srcSize */
+ return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
/* required for optimal parser to read stats from dictionary */
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
/* tell the optimal parser how we expect to compress literals */
ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
-
/* a gap between an attached dict and the current window is not safe,
* they must remain adjacent,
* and when that stops being the case, the dict must be unset */
@@ -2783,7 +2293,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
/* Updates ldmSeqStore.size */
FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
&zc->appliedParams.ldmParams,
- src, srcSize));
+ src, srcSize), "");
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&ldmSeqStore,
@@ -2798,6 +2308,134 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
} }
+ return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+ const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+ const seqDef* seqs = seqStore->sequencesStart;
+ size_t seqsSize = seqStore->sequences - seqs;
+
+ ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+ size_t i; size_t position; int repIdx;
+
+ assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+ for (i = 0, position = 0; i < seqsSize; ++i) {
+ outSeqs[i].offset = seqs[i].offset;
+ outSeqs[i].litLength = seqs[i].litLength;
+ outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
+
+ if (i == seqStore->longLengthPos) {
+ if (seqStore->longLengthID == 1) {
+ outSeqs[i].litLength += 0x10000;
+ } else if (seqStore->longLengthID == 2) {
+ outSeqs[i].matchLength += 0x10000;
+ }
+ }
+
+ if (outSeqs[i].offset <= ZSTD_REP_NUM) {
+ outSeqs[i].rep = outSeqs[i].offset;
+ repIdx = (unsigned int)i - outSeqs[i].offset;
+
+ if (outSeqs[i].litLength == 0) {
+ if (outSeqs[i].offset < 3) {
+ --repIdx;
+ } else {
+ repIdx = (unsigned int)i - 1;
+ }
+ ++outSeqs[i].rep;
+ }
+ assert(repIdx >= -3);
+ outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
+ if (outSeqs[i].rep == 4) {
+ --outSeqs[i].offset;
+ }
+ } else {
+ outSeqs[i].offset -= ZSTD_REP_NUM;
+ }
+
+ position += outSeqs[i].litLength;
+ outSeqs[i].matchPos = (unsigned int)position;
+ position += outSeqs[i].matchLength;
+ }
+ zc->seqCollector.seqIndex += seqsSize;
+}
+
+size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize)
+{
+ const size_t dstCapacity = ZSTD_compressBound(srcSize);
+ void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
+ SeqCollector seqCollector;
+
+ RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+ seqCollector.collectSequences = 1;
+ seqCollector.seqStart = outSeqs;
+ seqCollector.seqIndex = 0;
+ seqCollector.maxSequences = outSeqsSize;
+ zc->seqCollector = seqCollector;
+
+ ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+ ZSTD_free(dst, ZSTD_defaultCMem);
+ return zc->seqCollector.seqIndex;
+}
+
+/* Returns true if the given block is a RLE block */
+static int ZSTD_isRLE(const BYTE *ip, size_t length) {
+ size_t i;
+ if (length < 2) return 1;
+ for (i = 1; i < length; ++i) {
+ if (ip[0] != ip[i]) return 0;
+ }
+ return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+ size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+ return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+{
+ ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+ zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+ zc->blockState.nextCBlock = tmp;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize, U32 frame)
+{
+ /* This the upper bound for the length of an rle block.
+ * This isn't the actual upper bound. Finding the real threshold
+ * needs further investigation.
+ */
+ const U32 rleMaxLength = 25;
+ size_t cSize;
+ const BYTE* ip = (const BYTE*)src;
+ BYTE* op = (BYTE*)dst;
+ DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+ (unsigned)zc->blockState.matchState.nextToUpdate);
+
+ { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+ FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+ if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+ }
+
+ if (zc->seqCollector.collectSequences) {
+ ZSTD_copyBlockSequences(zc);
+ return 0;
+ }
/* encode sequences and literals */
cSize = ZSTD_compressSequences(&zc->seqStore,
@@ -2808,12 +2446,22 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->bmi2);
+ if (frame &&
+ /* We don't want to emit our first block as a RLE even if it qualifies because
+ * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+ * This is only an issue for zstd <= v1.4.3
+ */
+ !zc->isFirstBlock &&
+ cSize < rleMaxLength &&
+ ZSTD_isRLE(ip, srcSize))
+ {
+ cSize = 1;
+ op[0] = ip[0];
+ }
+
out:
- if (!ZSTD_isError(cSize) && cSize != 0) {
- /* confirm repcodes and entropy tables when emitting a compressed block */
- ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
- zc->blockState.prevCBlock = zc->blockState.nextCBlock;
- zc->blockState.nextCBlock = tmp;
+ if (!ZSTD_isError(cSize) && cSize > 1) {
+ ZSTD_confirmRepcodesAndEntropyTables(zc);
}
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
@@ -2825,6 +2473,104 @@ out:
return cSize;
}
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const size_t bss, U32 lastBlock)
+{
+ DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+ if (bss == ZSTDbss_compress) {
+ if (/* We don't want to emit our first block as a RLE even if it qualifies because
+ * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+ * This is only an issue for zstd <= v1.4.3
+ */
+ !zc->isFirstBlock &&
+ ZSTD_maybeRLE(&zc->seqStore) &&
+ ZSTD_isRLE((BYTE const*)src, srcSize))
+ {
+ return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+ }
+ /* Attempt superblock compression.
+ *
+ * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+ * standard ZSTD_compressBound(). This is a problem, because even if we have
+ * space now, taking an extra byte now could cause us to run out of space later
+ * and violate ZSTD_compressBound().
+ *
+ * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+ *
+ * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+ * uncompressed block in these cases:
+ * * cSize == 0: Return code for an uncompressed block.
+ * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+ * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+ * output space.
+ * * cSize >= blockBound(srcSize): We have expanded the block too much so
+ * emit an uncompressed block.
+ */
+ {
+ size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+ if (cSize != ERROR(dstSize_tooSmall)) {
+ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+ if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+ ZSTD_confirmRepcodesAndEntropyTables(zc);
+ return cSize;
+ }
+ }
+ }
+ }
+
+ DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+ /* Superblock compression failed, attempt to emit a single no compress block.
+ * The decoder will be able to stream this block since it is uncompressed.
+ */
+ return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ U32 lastBlock)
+{
+ size_t cSize = 0;
+ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+ DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+ FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+ cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+ zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+ return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
+ ZSTD_CCtx_params const* params,
+ void const* ip,
+ void const* iend)
+{
+ if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+ U32 const maxDist = (U32)1 << params->cParams.windowLog;
+ U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+ U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+ ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+ ZSTD_cwksp_mark_tables_dirty(ws);
+ ZSTD_reduceIndex(ms, params, correction);
+ ZSTD_cwksp_mark_tables_clean(ws);
+ if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+ else ms->nextToUpdate -= correction;
+ /* invalidate dictionaries on overflow correction */
+ ms->loadedDictEnd = 0;
+ ms->dictMatchState = NULL;
+ }
+}
/*! ZSTD_compress_frameChunk() :
* Compress a chunk of data into one or multiple blocks.
@@ -2844,7 +2590,8 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
- assert(cctx->appliedParams.cParams.windowLog <= 31);
+
+ assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@@ -2859,70 +2606,75 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
"not enough space to store compressed block");
if (remaining < blockSize) blockSize = remaining;
- if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
- U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
- U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
- ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
- ZSTD_reduceIndex(cctx, correction);
- if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
- else ms->nextToUpdate -= correction;
- ms->loadedDictEnd = 0;
- ms->dictMatchState = NULL;
- }
- ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
- if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+ ZSTD_overflowCorrectIfNeeded(
+ ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+ ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
- { size_t cSize = ZSTD_compressBlock_internal(cctx,
- op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
- ip, blockSize);
- FORWARD_IF_ERROR(cSize);
+ /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
- if (cSize == 0) { /* block is not compressible */
- cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
- FORWARD_IF_ERROR(cSize);
+ { size_t cSize;
+ if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+ cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+ assert(cSize > 0);
+ assert(cSize <= blockSize + ZSTD_blockHeaderSize);
} else {
- U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
- MEM_writeLE24(op, cBlockHeader24);
- cSize += ZSTD_blockHeaderSize;
+ cSize = ZSTD_compressBlock_internal(cctx,
+ op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+ ip, blockSize, 1 /* frame */);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+ if (cSize == 0) { /* block is not compressible */
+ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+ FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+ } else {
+ U32 const cBlockHeader = cSize == 1 ?
+ lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+ lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+ MEM_writeLE24(op, cBlockHeader);
+ cSize += ZSTD_blockHeaderSize;
+ }
}
+
ip += blockSize;
assert(remaining >= blockSize);
remaining -= blockSize;
op += cSize;
assert(dstCapacity >= cSize);
dstCapacity -= cSize;
+ cctx->isFirstBlock = 0;
DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
(unsigned)cSize);
} }
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
- return op-ostart;
+ return (size_t)(op-ostart);
}
static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
- ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID)
+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
{ BYTE* const op = (BYTE*)dst;
U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
- U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
- U32 const checksumFlag = params.fParams.checksumFlag>0;
- U32 const windowSize = (U32)1 << params.cParams.windowLog;
- U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
- BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
- U32 const fcsCode = params.fParams.contentSizeFlag ?
+ U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
+ U32 const checksumFlag = params->fParams.checksumFlag>0;
+ U32 const windowSize = (U32)1 << params->cParams.windowLog;
+ U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+ BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+ U32 const fcsCode = params->fParams.contentSizeFlag ?
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
size_t pos=0;
- assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
- RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall);
+ assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+ RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+ "dst buf is too small to fit worst-case frame header size.");
DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
- !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+ !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
- if (params.format == ZSTD_f_zstd1) {
+ if (params->format == ZSTD_f_zstd1) {
MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
pos = 4;
}
@@ -2954,7 +2706,8 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
*/
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
{
- RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall);
+ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+ "dst buf is too small to write frame trailer empty block.");
{ U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
MEM_writeLE24(dst, cBlockHeader24);
return ZSTD_blockHeaderSize;
@@ -2963,9 +2716,11 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{
- RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong);
+ RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+ "wrong cctx stage");
RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
- parameter_unsupported);
+ parameter_unsupported,
+ "incompatible with ldm");
cctx->externSeqStore.seq = seq;
cctx->externSeqStore.size = nbSeq;
cctx->externSeqStore.capacity = nbSeq;
@@ -2988,9 +2743,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
"missing init (ZSTD_compressBegin)");
if (frame && (cctx->stage==ZSTDcs_init)) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
+ fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
- FORWARD_IF_ERROR(fhSize);
+ FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+ assert(fhSize <= dstCapacity);
dstCapacity -= fhSize;
dst = (char*)dst + fhSize;
cctx->stage = ZSTDcs_ongoing;
@@ -3007,25 +2763,16 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
if (!frame) {
/* overflow check and correction for block mode */
- if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
- U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
- U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
- ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
- ZSTD_reduceIndex(cctx, correction);
- if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
- else ms->nextToUpdate -= correction;
- ms->loadedDictEnd = 0;
- ms->dictMatchState = NULL;
- }
+ ZSTD_overflowCorrectIfNeeded(
+ ms, &cctx->workspace, &cctx->appliedParams,
+ src, (BYTE const*)src + srcSize);
}
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
{ size_t const cSize = frame ?
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
- ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
- FORWARD_IF_ERROR(cSize);
+ ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+ FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
cctx->consumedSrcSize += srcSize;
cctx->producedCSize += (cSize + fhSize);
assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
@@ -3060,8 +2807,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
- size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
- RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong);
+ DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+ { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+ RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
@@ -3070,47 +2818,67 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
* @return : 0, or an error code
*/
static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+ ldmState_t* ls,
+ ZSTD_cwksp* ws,
ZSTD_CCtx_params const* params,
const void* src, size_t srcSize,
ZSTD_dictTableLoadMethod_e dtlm)
{
- const BYTE* const ip = (const BYTE*) src;
+ const BYTE* ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
ZSTD_window_update(&ms->window, src, srcSize);
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+ if (params->ldmParams.enableLdm && ls != NULL) {
+ ZSTD_window_update(&ls->window, src, srcSize);
+ ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+ }
+
/* Assert that we the ms params match the params we're being given */
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
if (srcSize <= HASH_READ_SIZE) return 0;
- switch(params->cParams.strategy)
- {
- case ZSTD_fast:
- ZSTD_fillHashTable(ms, iend, dtlm);
- break;
- case ZSTD_dfast:
- ZSTD_fillDoubleHashTable(ms, iend, dtlm);
- break;
+ while (iend - ip > HASH_READ_SIZE) {
+ size_t const remaining = (size_t)(iend - ip);
+ size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+ const BYTE* const ichunk = ip + chunk;
- case ZSTD_greedy:
- case ZSTD_lazy:
- case ZSTD_lazy2:
- if (srcSize >= HASH_READ_SIZE)
- ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
- break;
+ ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
- case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
- case ZSTD_btopt:
- case ZSTD_btultra:
- case ZSTD_btultra2:
- if (srcSize >= HASH_READ_SIZE)
- ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
- break;
+ if (params->ldmParams.enableLdm && ls != NULL)
+ ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
- default:
- assert(0); /* not possible : not a valid strategy id */
+ switch(params->cParams.strategy)
+ {
+ case ZSTD_fast:
+ ZSTD_fillHashTable(ms, ichunk, dtlm);
+ break;
+ case ZSTD_dfast:
+ ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+ break;
+
+ case ZSTD_greedy:
+ case ZSTD_lazy:
+ case ZSTD_lazy2:
+ if (chunk >= HASH_READ_SIZE)
+ ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+ break;
+
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
+ case ZSTD_btopt:
+ case ZSTD_btultra:
+ case ZSTD_btultra2:
+ if (chunk >= HASH_READ_SIZE)
+ ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+ break;
+
+ default:
+ assert(0); /* not possible : not a valid strategy id */
+ }
+
+ ip = ichunk;
}
ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3124,101 +2892,123 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
NOTE: This behavior is not standard and could be improved in the future. */
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
U32 s;
- RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted);
+ RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols");
for (s = 0; s <= maxSymbolValue; ++s) {
- RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted);
+ RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols");
}
return 0;
}
-
-/* Dictionary format :
- * See :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
- */
-/*! ZSTD_loadZstdDictionary() :
- * @return : dictID, or an error code
- * assumptions : magic number supposed already checked
- * dictSize supposed > 8
- */
-static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
- ZSTD_matchState_t* ms,
- ZSTD_CCtx_params const* params,
- const void* dict, size_t dictSize,
- ZSTD_dictTableLoadMethod_e dtlm,
- void* workspace)
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+ short* offcodeNCount, unsigned* offcodeMaxValue,
+ const void* const dict, size_t dictSize)
{
- const BYTE* dictPtr = (const BYTE*)dict;
+ const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
const BYTE* const dictEnd = dictPtr + dictSize;
- short offcodeNCount[MaxOff+1];
- unsigned offcodeMaxValue = MaxOff;
- size_t dictID;
+ dictPtr += 8;
+ bs->entropy.huf.repeatMode = HUF_repeat_check;
- ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
- assert(dictSize > 8);
- assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+ { unsigned maxSymbolValue = 255;
+ unsigned hasZeroWeights = 1;
+ size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+ dictEnd-dictPtr, &hasZeroWeights);
- dictPtr += 4; /* skip magic number */
- dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
- dictPtr += 4;
+ /* We only set the loaded table as valid if it contains all non-zero
+ * weights. Otherwise, we set it to check */
+ if (!hasZeroWeights)
+ bs->entropy.huf.repeatMode = HUF_repeat_valid;
- { unsigned maxSymbolValue = 255;
- size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted);
+ RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
dictPtr += hufHeaderSize;
}
{ unsigned offcodeLog;
- size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+ RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
/* fill all offset symbols to avoid garbage at end of table */
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.offcodeCTable,
offcodeNCount, MaxOff, offcodeLog,
workspace, HUF_WORKSPACE_SIZE)),
- dictionary_corrupted);
+ dictionary_corrupted, "");
dictPtr += offcodeHeaderSize;
}
{ short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
+ RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
/* Every match length code must have non-zero probability */
- FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
+ FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.matchlengthCTable,
matchlengthNCount, matchlengthMaxValue, matchlengthLog,
workspace, HUF_WORKSPACE_SIZE)),
- dictionary_corrupted);
+ dictionary_corrupted, "");
dictPtr += matchlengthHeaderSize;
}
{ short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
+ RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
/* Every literal length code must have non-zero probability */
- FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
+ FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
bs->entropy.fse.litlengthCTable,
litlengthNCount, litlengthMaxValue, litlengthLog,
workspace, HUF_WORKSPACE_SIZE)),
- dictionary_corrupted);
+ dictionary_corrupted, "");
dictPtr += litlengthHeaderSize;
}
- RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
+ RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
bs->rep[0] = MEM_readLE32(dictPtr+0);
bs->rep[1] = MEM_readLE32(dictPtr+4);
bs->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
+ return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ * assumptions : magic number supposed already checked
+ * dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+ ZSTD_matchState_t* ms,
+ ZSTD_cwksp* ws,
+ ZSTD_CCtx_params const* params,
+ const void* dict, size_t dictSize,
+ ZSTD_dictTableLoadMethod_e dtlm,
+ void* workspace)
+{
+ const BYTE* dictPtr = (const BYTE*)dict;
+ const BYTE* const dictEnd = dictPtr + dictSize;
+ short offcodeNCount[MaxOff+1];
+ unsigned offcodeMaxValue = MaxOff;
+ size_t dictID;
+ size_t eSize;
+
+ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+ assert(dictSize >= 8);
+ assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+ dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+ eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
+ FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+ dictPtr += eSize;
+
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
U32 offcodeMax = MaxOff;
if (dictContentSize <= ((U32)-1) - 128 KB) {
@@ -3226,19 +3016,19 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
}
/* All offset values <= dictContentSize + 128 KB must be representable */
- FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+ FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
/* All repCodes must be <= dictContentSize and != 0*/
{ U32 u;
for (u=0; u<3; u++) {
- RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted);
- RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted);
+ RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+ RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
} }
- bs->entropy.huf.repeatMode = HUF_repeat_valid;
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
- FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
+ FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+ ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
return dictID;
}
}
@@ -3248,6 +3038,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
+ ldmState_t* ls,
+ ZSTD_cwksp* ws,
const ZSTD_CCtx_params* params,
const void* dict, size_t dictSize,
ZSTD_dictContentType_e dictContentType,
@@ -3255,27 +3047,35 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
void* workspace)
{
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
- if ((dict==NULL) || (dictSize<=8)) return 0;
+ if ((dict==NULL) || (dictSize<8)) {
+ RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+ return 0;
+ }
ZSTD_reset_compressedBlockState(bs);
/* dict restricted modes */
if (dictContentType == ZSTD_dct_rawContent)
- return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+ return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
- return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+ return ZSTD_loadDictionaryContent(
+ ms, ls, ws, params, dict, dictSize, dtlm);
}
- RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
+ RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
- return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
+ return ZSTD_loadZstdDictionary(
+ bs, ms, ws, params, dict, dictSize, dtlm, workspace);
}
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
+
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -3283,26 +3083,37 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, U64 pledgedSrcSize,
+ const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
- DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
+ DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
/* params are supposed to be fully validated at this point */
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
-
- if (cdict && cdict->dictContentSize>0) {
+ if ( (cdict)
+ && (cdict->dictContentSize > 0)
+ && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+ || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+ || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+ || cdict->compressionLevel == 0)
+ && (params->attachDictPref != ZSTD_dictForceLoad) ) {
return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
}
- FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
- ZSTDcrp_continue, zbuff) );
- {
- size_t const dictID = ZSTD_compress_insertDictionary(
- cctx->blockState.prevCBlock, &cctx->blockState.matchState,
- &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
- FORWARD_IF_ERROR(dictID);
- assert(dictID <= (size_t)(U32)-1);
+ FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+ ZSTDcrp_makeClean, zbuff) , "");
+ { size_t const dictID = cdict ?
+ ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+ cdict->dictContentSize, dictContentType, dtlm,
+ cctx->entropyWorkspace)
+ : ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+ dictContentType, dtlm, cctx->entropyWorkspace);
+ FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+ assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID;
}
return 0;
@@ -3313,12 +3124,12 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog);
+ DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
/* compression parameters verification and optimization */
- FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
+ FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
return ZSTD_compressBegin_internal(cctx,
dict, dictSize, dictContentType, dtlm,
cdict,
@@ -3333,21 +3144,21 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
return ZSTD_compressBegin_advanced_internal(cctx,
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
NULL /*cdict*/,
- cctxParams, pledgedSrcSize);
+ &cctxParams, pledgedSrcSize);
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
- cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+ &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
@@ -3370,8 +3181,8 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
/* special case : empty frame */
if (cctx->stage == ZSTDcs_init) {
- fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
- FORWARD_IF_ERROR(fhSize);
+ fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+ FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
dstCapacity -= fhSize;
op += fhSize;
cctx->stage = ZSTDcs_ongoing;
@@ -3380,7 +3191,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->stage != ZSTDcs_ending) {
/* write one last empty block, make it the "last" block */
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
+ RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
MEM_writeLE32(op, cBlockHeader24);
op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
@@ -3388,7 +3199,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
- RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
+ RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
MEM_writeLE32(op, checksum);
op += 4;
@@ -3406,9 +3217,9 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
size_t const cSize = ZSTD_compressContinue_internal(cctx,
dst, dstCapacity, src, srcSize,
1 /* frame mode */, 1 /* last chunk */);
- FORWARD_IF_ERROR(cSize);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
- FORWARD_IF_ERROR(endResult);
+ FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
@@ -3428,16 +3239,16 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_parameters params)
+ const ZSTD_parameters* params)
{
ZSTD_CCtx_params const cctxParams =
- ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
DEBUGLOG(4, "ZSTD_compress_internal");
return ZSTD_compress_advanced_internal(cctx,
dst, dstCapacity,
src, srcSize,
dict, dictSize,
- cctxParams);
+ &cctxParams);
}
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
@@ -3447,12 +3258,12 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
ZSTD_parameters params)
{
DEBUGLOG(4, "ZSTD_compress_advanced");
- FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams));
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
return ZSTD_compress_internal(cctx,
dst, dstCapacity,
src, srcSize,
dict, dictSize,
- params);
+ &params);
}
/* Internal */
@@ -3461,12 +3272,12 @@ size_t ZSTD_compress_advanced_internal(
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_CCtx_params params)
+ const ZSTD_CCtx_params* params)
{
DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
- params, srcSize, ZSTDb_not_buffered) );
+ params, srcSize, ZSTDb_not_buffered) , "");
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
@@ -3476,10 +3287,11 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
int compressionLevel)
{
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
- ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
+ ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
+ DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
assert(params.fParams.contentSizeFlag == 1);
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
}
size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3514,13 +3326,16 @@ size_t ZSTD_estimateCDictSize_advanced(
ZSTD_dictLoadMethod_e dictLoadMethod)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
- return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
- + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+ return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+ + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+ + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
}
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
{
- ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
}
@@ -3528,7 +3343,9 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
- return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
+ /* cdict may be in the workspace */
+ return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+ + ZSTD_cwksp_sizeof(&cdict->workspace);
}
static size_t ZSTD_initCDict_internal(
@@ -3542,28 +3359,29 @@ static size_t ZSTD_initCDict_internal(
assert(!ZSTD_checkCParams(cParams));
cdict->matchState.cParams = cParams;
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
- cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
- void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
- cdict->dictBuffer = internalBuffer;
+ void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+ RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
cdict->dictContent = internalBuffer;
- RETURN_ERROR_IF(!internalBuffer, memory_allocation);
memcpy(internalBuffer, dictBuffer, dictSize);
}
cdict->dictContentSize = dictSize;
+ cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
/* Reset the state to no dictionary */
ZSTD_reset_compressedBlockState(&cdict->cBlockState);
- { void* const end = ZSTD_reset_matchState(
- &cdict->matchState,
- (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
- &cParams, ZSTDcrp_continue, /* forCCtx */ 0);
- assert(end == (char*)cdict->workspace + cdict->workspaceSize);
- (void)end;
- }
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &cdict->matchState,
+ &cdict->workspace,
+ &cParams,
+ ZSTDcrp_makeClean,
+ ZSTDirp_reset,
+ ZSTD_resetTarget_CDict), "");
/* (Maybe) load the dictionary
- * Skips loading the dictionary if it is <= 8 bytes.
+ * Skips loading the dictionary if it is < 8 bytes.
*/
{ ZSTD_CCtx_params params;
memset(&params, 0, sizeof(params));
@@ -3571,10 +3389,10 @@ static size_t ZSTD_initCDict_internal(
params.fParams.contentSizeFlag = 1;
params.cParams = cParams;
{ size_t const dictID = ZSTD_compress_insertDictionary(
- &cdict->cBlockState, &cdict->matchState, &params,
- cdict->dictContent, cdict->dictContentSize,
- dictContentType, ZSTD_dtlm_full, cdict->workspace);
- FORWARD_IF_ERROR(dictID);
+ &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+ &params, cdict->dictContent, cdict->dictContentSize,
+ dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+ FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
assert(dictID <= (size_t)(U32)-1);
cdict->dictID = (U32)dictID;
}
@@ -3591,18 +3409,29 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
- { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
- size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+ { size_t const workspaceSize =
+ ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+ ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
void* const workspace = ZSTD_malloc(workspaceSize, customMem);
+ ZSTD_cwksp ws;
+ ZSTD_CDict* cdict;
- if (!cdict || !workspace) {
- ZSTD_free(cdict, customMem);
+ if (!workspace) {
ZSTD_free(workspace, customMem);
return NULL;
}
+
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+
+ cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+ assert(cdict != NULL);
+ ZSTD_cwksp_move(&cdict->workspace, &ws);
cdict->customMem = customMem;
- cdict->workspace = workspace;
- cdict->workspaceSize = workspaceSize;
+ cdict->compressionLevel = 0; /* signals advanced API usage */
+
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize,
dictLoadMethod, dictContentType,
@@ -3617,15 +3446,18 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
- return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dct_auto,
- cParams, ZSTD_defaultCMem);
+ ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+ ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
+ cParams, ZSTD_defaultCMem);
+ if (cdict)
+ cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+ return cdict;
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+ ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
@@ -3635,9 +3467,11 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = cdict->customMem;
- ZSTD_free(cdict->workspace, cMem);
- ZSTD_free(cdict->dictBuffer, cMem);
- ZSTD_free(cdict, cMem);
+ int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+ ZSTD_cwksp_free(&cdict->workspace, cMem);
+ if (!cdictInWorkspace) {
+ ZSTD_free(cdict, cMem);
+ }
return 0;
}
}
@@ -3663,28 +3497,30 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
ZSTD_compressionParameters cParams)
{
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
- size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
- + HUF_WORKSPACE_SIZE + matchStateSize;
- ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
- void* ptr;
+ size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+ + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+ : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+ + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+ + matchStateSize;
+ ZSTD_CDict* cdict;
+
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
+
+ {
+ ZSTD_cwksp ws;
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+ cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+ if (cdict == NULL) return NULL;
+ ZSTD_cwksp_move(&cdict->workspace, &ws);
+ }
+
DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
if (workspaceSize < neededSize) return NULL;
- if (dictLoadMethod == ZSTD_dlm_byCopy) {
- memcpy(cdict+1, dict, dictSize);
- dict = cdict+1;
- ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
- } else {
- ptr = cdict+1;
- }
- cdict->workspace = ptr;
- cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
-
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
- ZSTD_dlm_byRef, dictContentType,
+ dictLoadMethod, dictContentType,
cParams) ))
return NULL;
@@ -3704,9 +3540,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
- RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
+ RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
{ ZSTD_CCtx_params params = cctx->requestedParams;
- params.cParams = ZSTD_getCParamsFromCDict(cdict);
+ params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+ || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+ || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+ || cdict->compressionLevel == 0 )
+ && (params.attachDictPref != ZSTD_dictForceLoad) ?
+ ZSTD_getCParamsFromCDict(cdict)
+ : ZSTD_getCParams(cdict->compressionLevel,
+ pledgedSrcSize,
+ cdict->dictContentSize);
/* Increase window log to fit the entire dictionary and source if the
* source size is known. Limit the increase to 19, which is the
* window log for compression level 1 with the largest source size.
@@ -3720,7 +3564,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
return ZSTD_compressBegin_internal(cctx,
NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
cdict,
- params, pledgedSrcSize,
+ &params, pledgedSrcSize,
ZSTDb_not_buffered);
}
}
@@ -3740,7 +3584,7 @@ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
{
- FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
+ FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
@@ -3811,8 +3655,8 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, dictContentType, ZSTD_dtlm_fast,
cdict,
- params, pledgedSrcSize,
- ZSTDb_buffered) );
+ &params, pledgedSrcSize,
+ ZSTDb_buffered) , "");
cctx->inToCompress = 0;
cctx->inBuffPos = 0;
@@ -3834,8 +3678,8 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
*/
U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
return 0;
}
@@ -3845,19 +3689,20 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
* Assumption 2 : either dict, or cdict, is defined, not both */
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
+ const ZSTD_CCtx_params* params,
+ unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_initCStream_internal");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
- zcs->requestedParams = params;
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+ zcs->requestedParams = *params;
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
if (dict) {
- FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
} else {
/* Dictionary is cleared if !cdict */
- FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
}
return 0;
}
@@ -3870,10 +3715,10 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
zcs->requestedParams.fParams = fParams;
- FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
return 0;
}
@@ -3881,8 +3726,8 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
{
DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
return 0;
}
@@ -3890,7 +3735,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
/* ZSTD_initCStream_advanced() :
* pledgedSrcSize must be exact.
* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pss)
@@ -3901,20 +3746,20 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
*/
U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
DEBUGLOG(4, "ZSTD_initCStream_advanced");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
- FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
- zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
- FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+ FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+ zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, &params);
+ FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
return 0;
}
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
{
DEBUGLOG(4, "ZSTD_initCStream_usingDict");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
- FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
return 0;
}
@@ -3926,19 +3771,19 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigne
*/
U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
DEBUGLOG(4, "ZSTD_initCStream_srcSize");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
return 0;
}
size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
{
DEBUGLOG(4, "ZSTD_initCStream");
- FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
- FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
+ FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+ FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
return 0;
}
@@ -3951,14 +3796,6 @@ static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
return hintInSize;
}
-static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
-{
- size_t const length = MIN(dstCapacity, srcSize);
- if (length) memcpy(dst, src, length);
- return length;
-}
-
/** ZSTD_compressStream_generic():
* internal function for all *compressStream*() variants
* non-static, because can be called from zstdmt_compress.c
@@ -3969,11 +3806,11 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_EndDirective const flushMode)
{
const char* const istart = (const char*)input->src;
- const char* const iend = istart + input->size;
- const char* ip = istart + input->pos;
+ const char* const iend = input->size != 0 ? istart + input->size : istart;
+ const char* ip = input->pos != 0 ? istart + input->pos : istart;
char* const ostart = (char*)output->dst;
- char* const oend = ostart + output->size;
- char* op = ostart + output->pos;
+ char* const oend = output->size != 0 ? ostart + output->size : ostart;
+ char* op = output->pos != 0 ? ostart + output->pos : ostart;
U32 someMoreWork = 1;
/* check expectations */
@@ -3999,7 +3836,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
size_t const cSize = ZSTD_compressEnd(zcs,
op, oend-op, ip, iend-ip);
DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
- FORWARD_IF_ERROR(cSize);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
ip = iend;
op += cSize;
zcs->frameEnded = 1;
@@ -4012,7 +3849,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
zcs->inBuff + zcs->inBuffPos, toLoad,
ip, iend-ip);
zcs->inBuffPos += loaded;
- ip += loaded;
+ if (loaded != 0)
+ ip += loaded;
if ( (flushMode == ZSTD_e_continue)
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
/* not enough input to fill full block : stop here */
@@ -4040,7 +3878,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
zcs->inBuff + zcs->inToCompress, iSize) :
ZSTD_compressContinue(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize);
- FORWARD_IF_ERROR(cSize);
+ FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
zcs->frameEnded = lastBlock;
/* prepare next block */
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
@@ -4068,11 +3906,12 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
case zcss_flush:
DEBUGLOG(5, "flush stage");
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
- size_t const flushed = ZSTD_limitCopy(op, oend-op,
+ size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
- op += flushed;
+ if (flushed)
+ op += flushed;
zcs->outBuffFlushedSize += flushed;
if (toFlush!=flushed) {
/* flush not fully completed, presumably because dst is too small */
@@ -4116,7 +3955,7 @@ static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
- FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) );
+ FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
return ZSTD_nextInputSizeHint_MTorST(zcs);
}
@@ -4128,15 +3967,15 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
{
DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
/* check conditions */
- RETURN_ERROR_IF(output->pos > output->size, GENERIC);
- RETURN_ERROR_IF(input->pos > input->size, GENERIC);
+ RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer");
+ RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer");
assert(cctx!=NULL);
/* transparent initialization stage */
if (cctx->streamStage == zcss_init) {
ZSTD_CCtx_params params = cctx->requestedParams;
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
- FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */
+ FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
@@ -4154,15 +3993,15 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
if (cctx->mtctx == NULL) {
DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
params.nbWorkers);
- cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
- RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation);
+ cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
+ RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
}
/* mt compression */
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
cctx->mtctx,
- prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent,
- cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
+ prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+ cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
cctx->streamStage = zcss_load;
cctx->appliedParams.nbWorkers = params.nbWorkers;
} else
@@ -4170,7 +4009,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
{ FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
cctx->cdict,
- params, cctx->pledgedSrcSizePlusOne-1) );
+ params, cctx->pledgedSrcSizePlusOne-1) , "");
assert(cctx->streamStage == zcss_load);
assert(cctx->appliedParams.nbWorkers == 0);
} }
@@ -4192,7 +4031,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
}
- FORWARD_IF_ERROR(flushMin);
+ FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
} while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
/* Either we don't require maximum forward progress, we've finished the
@@ -4202,7 +4041,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
return flushMin;
}
#endif
- FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) );
+ FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
DEBUGLOG(5, "completed ZSTD_compressStream2");
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
}
@@ -4226,6 +4065,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
+ DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
{ size_t oPos = 0;
size_t iPos = 0;
@@ -4233,10 +4073,10 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
dst, dstCapacity, &oPos,
src, srcSize, &iPos,
ZSTD_e_end);
- FORWARD_IF_ERROR(result);
+ FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
if (result != 0) { /* compression not completed, due to lack of output space */
assert(oPos == dstCapacity);
- RETURN_ERROR(dstSize_tooSmall);
+ RETURN_ERROR(dstSize_tooSmall, "");
}
assert(iPos == srcSize); /* all input is expected consumed */
return oPos;
@@ -4258,11 +4098,11 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
{
ZSTD_inBuffer input = { NULL, 0, 0 };
size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
- FORWARD_IF_ERROR( remainingToFlush );
+ FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
/* single thread mode : attempt to calculate remaining to flush more precisely */
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
- size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
+ size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
return toFlush;
@@ -4282,8 +4122,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
- { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
- { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
+ { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
+ { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
{ 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
{ 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
{ 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
@@ -4307,8 +4147,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
/* W, C, H, S, L, T, strat */
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
- { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
- { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
+ { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
+ { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
{ 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
{ 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
@@ -4334,8 +4174,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
- { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
- { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
+ { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
+ { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
{ 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
{ 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
@@ -4360,7 +4200,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
- { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */
+ { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
@@ -4383,35 +4223,56 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
};
-/*! ZSTD_getCParams() :
+/*! ZSTD_getCParams_internal() :
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
- * Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+ * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ * Use dictSize == 0 for unknown or unused. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
{
- size_t const addedSize = srcSizeHint ? 0 : 500;
- U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */
+ int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+ U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
int row = compressionLevel;
- DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
+ DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
- return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */
+ /* refine parameters based on srcSize & dictSize */
+ return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
}
}
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ * Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+ if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+ return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+}
+
/*! ZSTD_getParams() :
* same idea as ZSTD_getCParams()
* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
* Fields of `ZSTD_frameParameters` are set to default values */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
ZSTD_parameters params;
- ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
memset(&params, 0, sizeof(params));
params.cParams = cParams;
params.fParams.contentSizeFlag = 1;
return params;
}
+
+/*! ZSTD_getParams() :
+ * same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ * Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+ if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+ return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
+}
diff --git a/zstd/lib/compress/zstd_compress_internal.h b/zstd/lib/compress/zstd_compress_internal.h
index cc3cbb9..db73f6c 100644
--- a/zstd/lib/compress/zstd_compress_internal.h
+++ b/zstd/lib/compress/zstd_compress_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,8 @@
/*-*************************************
* Dependencies
***************************************/
-#include "zstd_internal.h"
+#include "../common/zstd_internal.h"
+#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
@@ -33,13 +34,13 @@ extern "C" {
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
-#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
+#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
- Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
@@ -128,21 +129,26 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
- U32 lowLimit; /* below that point, no more data */
+ U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
- U32 loadedDictEnd; /* index of end of dictionary */
+ U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
+ * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+ * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+ * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+ * When dict referential is copied into active context (i.e. not attached),
+ * loadedDictEnd == dictSize, since referential starts from zero.
+ */
U32 nextToUpdate; /* index from which to continue table update */
- U32 nextToUpdate3; /* index from which to continue table update */
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
+ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
- const ZSTD_matchState_t * dictMatchState;
+ const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
@@ -160,6 +166,7 @@ typedef struct {
typedef struct {
ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
+ U32 loadedDictEnd;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
@@ -187,6 +194,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
+typedef struct {
+ int collectSequences;
+ ZSTD_Sequence* seqStart;
+ size_t seqIndex;
+ size_t maxSequences;
+} SeqCollector;
+
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
@@ -195,6 +209,12 @@ struct ZSTD_CCtx_params_s {
int compressionLevel;
int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
+ size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size */
+ int srcSizeHint; /* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -220,9 +240,7 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params appliedParams;
U32 dictID;
- int workSpaceOversizedDuration;
- void* workSpace;
- size_t workSpaceSize;
+ ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
@@ -230,6 +248,9 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
+ SeqCollector seqCollector;
+ int isFirstBlock;
+ int initialized;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */
@@ -305,26 +326,145 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}
+typedef struct repcodes_s {
+ U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+ repcodes_t newReps;
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
+ newReps.rep[2] = rep[1];
+ newReps.rep[1] = rep[0];
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
+ } else { /* repcode */
+ U32 const repCode = offset + ll0;
+ if (repCode > 0) { /* note : if repCode==0, no change */
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+ newReps.rep[1] = rep[0];
+ newReps.rep[0] = currentOffset;
+ } else { /* repCode == 0 */
+ memcpy(&newReps, rep, sizeof(newReps));
+ }
+ }
+ return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+ if (ZSTD_isError(bounds.error)) return 0;
+ if (value < bounds.lowerBound) return 0;
+ if (value > bounds.upperBound) return 0;
+ return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+ RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+ dstSize_tooSmall, "dst buf too small for uncompressed block");
+ MEM_writeLE24(dst, cBlockHeader24);
+ memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+ return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+ BYTE* const op = (BYTE*)dst;
+ U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+ RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+ MEM_writeLE24(op, cBlockHeader);
+ op[3] = src;
+ return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+ U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+ ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+ return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+ switch (cctxParams->literalCompressionMode) {
+ case ZSTD_lcm_huffman:
+ return 0;
+ case ZSTD_lcm_uncompressed:
+ return 1;
+ default:
+ assert(0 /* impossible: pre-validated */);
+ /* fall-through */
+ case ZSTD_lcm_auto:
+ return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+ }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ * large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+ assert(iend > ilimit_w);
+ if (ip <= ilimit_w) {
+ ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+ op += ilimit_w - ip;
+ ip = ilimit_w;
+ }
+ while (ip < iend) *op++ = *ip++;
+}
+
/*! ZSTD_storeSeq() :
- * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
- * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
+ * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH
+ * Allowed to overread literals up to litLimit.
*/
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{
+ BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+ BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+ pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
- ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+ assert(literals + litLength <= litLimit);
+ if (litEnd <= litLimit_w) {
+ /* Common case we can use wildcopy.
+ * First copy 16 bytes, because literals are likely short.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(seqStorePtr->lit, literals);
+ if (litLength > 16) {
+ ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+ }
+ } else {
+ ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+ }
seqStorePtr->lit += litLength;
/* literal Length */
@@ -336,7 +476,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
- seqStorePtr->sequences[0].offset = offsetCode + 1;
+ seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */
if (mlBase>0xFFFF) {
@@ -359,8 +499,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
- _BitScanForward64( &r, (U64)val );
- return (unsigned)(r>>3);
+ return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
@@ -377,8 +516,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r=0;
- _BitScanForward( &r, (U32)val );
- return (unsigned)(r>>3);
+ return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
@@ -393,8 +531,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
- _BitScanReverse64( &r, val );
- return (unsigned)(r>>3);
+ return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
@@ -408,8 +545,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r = 0;
- _BitScanReverse( &r, (unsigned long)val );
- return (unsigned)(r>>3);
+ return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
@@ -564,6 +700,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-*************************************
* Round buffer management
***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */
@@ -653,7 +792,10 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
*/
U32 const cycleMask = (1U << cycleLog) - 1;
U32 const current = (U32)((BYTE const*)src - window->base);
- U32 const newCurrent = (current & cycleMask) + maxDist;
+ U32 const currentCycle0 = current & cycleMask;
+ /* Exclude zero so that newCurrent - maxDist >= 1. */
+ U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+ U32 const newCurrent = currentCycle1 + maxDist;
U32 const correction = current - newCurrent;
assert((maxDist & cycleMask) == 0);
assert(current > newCurrent);
@@ -662,8 +804,17 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
window->base += correction;
window->dictBase += correction;
- window->lowLimit -= correction;
- window->dictLimit -= correction;
+ if (window->lowLimit <= correction) window->lowLimit = 1;
+ else window->lowLimit -= correction;
+ if (window->dictLimit <= correction) window->dictLimit = 1;
+ else window->dictLimit -= correction;
+
+ /* Ensure we can still reference the full window. */
+ assert(newCurrent >= maxDist);
+ assert(newCurrent - maxDist >= 1);
+ /* Ensure that lowLimit and dictLimit didn't underflow. */
+ assert(window->lowLimit <= newCurrent);
+ assert(window->dictLimit <= newCurrent);
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
window->lowLimit);
@@ -675,31 +826,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
- * This allows a simple check that index >= lowLimit to see if index is valid.
- * This must be called before a block compression call, with srcEnd as the block
- * source end.
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
*
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
- * This is because dictionaries are allowed to be referenced as long as the last
- * byte of the dictionary is in the window, but once they are out of range,
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
- * loadedDictEnd == 0.
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
- * is below them. forceWindow and dictMatchState are therefore incompatible.
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
- void const* srcEnd,
- U32 maxDist,
- U32* loadedDictEndPtr,
+ const void* blockEnd,
+ U32 maxDist,
+ U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
- U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
- U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
- DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
- (unsigned)blockEndIdx, (unsigned)maxDist);
+ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+ U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+ /* - When there is no dictionary : loadedDictEnd == 0.
+ In which case, the test (blockEndIdx > maxDist) is merely to avoid
+ overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+ - When there is a standard dictionary :
+ Index referential is copied from the dictionary,
+ which means it starts from 0.
+ In which case, loadedDictEnd == dictSize,
+ and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+ since `blockEndIdx` also starts from zero.
+ - When there is an attached dictionary :
+ loadedDictEnd is expressed within the referential of the context,
+ so it can be directly compared against blockEndIdx.
+ */
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,11 +877,54 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
- if (loadedDictEndPtr)
+ /* On reaching window size, dictionaries are invalidated */
+ if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+ if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+ }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ * loadedDictEnd uses same referential as window->base
+ * maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+ const void* blockEnd,
+ U32 maxDist,
+ U32* loadedDictEndPtr,
+ const ZSTD_matchState_t** dictMatchStatePtr)
+{
+ assert(loadedDictEndPtr != NULL);
+ assert(dictMatchStatePtr != NULL);
+ { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+ U32 const loadedDictEnd = *loadedDictEndPtr;
+ DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+ assert(blockEndIdx >= loadedDictEnd);
+
+ if (blockEndIdx > loadedDictEnd + maxDist) {
+ /* On reaching window size, dictionaries are invalidated.
+ * For simplification, if window size is reached anywhere within next block,
+ * the dictionary is invalidated for the full block.
+ */
+ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
- if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
- }
+ } else {
+ if (*loadedDictEndPtr != 0) {
+ DEBUGLOG(6, "dictionary considered valid for current block");
+ } } }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+ memset(window, 0, sizeof(*window));
+ window->base = (BYTE const*)"";
+ window->dictBase = (BYTE const*)"";
+ window->dictLimit = 1; /* start from 1, so that 1st position is valid */
+ window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
+ window->nextSrc = window->base + 1; /* see issue #1241 */
}
/**
@@ -728,6 +940,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
BYTE const* const ip = (BYTE const*)src;
U32 contiguous = 1;
DEBUGLOG(5, "ZSTD_window_update");
+ if (srcSize == 0)
+ return contiguous;
+ assert(window->base != NULL);
+ assert(window->dictBase != NULL);
/* Check if blocks follow each other */
if (src != window->nextSrc) {
/* not contiguous */
@@ -738,7 +954,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
window->dictLimit = (U32)distanceFromBase;
window->dictBase = window->base;
window->base = ip - distanceFromBase;
- // ms->nextToUpdate = window->dictLimit;
+ /* ms->nextToUpdate = window->dictLimit; */
if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
contiguous = 0;
}
@@ -754,6 +970,33 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
return contiguous;
}
+/**
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+ U32 const maxDistance = 1U << windowLog;
+ U32 const lowestValid = ms->window.lowLimit;
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
+ return matchLowest;
+}
+
+/**
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+ U32 const maxDistance = 1U << windowLog;
+ U32 const lowestValid = ms->window.dictLimit;
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
+ return matchLowest;
+}
+
+
/* debug functions */
#if (DEBUGLEVEL>=2)
@@ -791,6 +1034,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
}
#endif
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ * and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+ short* offcodeNCount, unsigned* offcodeMaxValue,
+ const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
/* ==============================================================
* Private declarations
@@ -800,6 +1058,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
/* ZSTD_getCParamsFromCCtxParams() :
* cParams are built depending on compressionLevel, src size hints,
* LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
*/
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
@@ -812,7 +1071,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
+ const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
@@ -827,7 +1086,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params params,
+ const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() :
@@ -836,7 +1095,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
- ZSTD_CCtx_params params);
+ const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() :
@@ -859,5 +1118,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
*/
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+/** ZSTD_cycleLog() :
+ * condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
#endif /* ZSTD_COMPRESS_H */
diff --git a/zstd/lib/compress/zstd_compress_literals.c b/zstd/lib/compress/zstd_compress_literals.c
new file mode 100644
index 0000000..17e7168
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_literals.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ * Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ BYTE* const ostart = (BYTE* const)dst;
+ U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+ RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+ switch(flSize)
+ {
+ case 1: /* 2 - 1 - 5 */
+ ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+ break;
+ case 2: /* 2 - 2 - 12 */
+ MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+ break;
+ case 3: /* 2 - 2 - 20 */
+ MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+ break;
+ default: /* not necessary : flSize is {1,2,3} */
+ assert(0);
+ }
+
+ memcpy(ostart + flSize, src, srcSize);
+ DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+ return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ BYTE* const ostart = (BYTE* const)dst;
+ U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+ (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+ switch(flSize)
+ {
+ case 1: /* 2 - 1 - 5 */
+ ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+ break;
+ case 2: /* 2 - 2 - 12 */
+ MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+ break;
+ case 3: /* 2 - 2 - 20 */
+ MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+ break;
+ default: /* not necessary : flSize is {1,2,3} */
+ assert(0);
+ }
+
+ ostart[flSize] = *(const BYTE*)src;
+ DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+ return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_strategy strategy, int disableLiteralCompression,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
+ const int bmi2)
+{
+ size_t const minGain = ZSTD_minGain(srcSize, strategy);
+ size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+ BYTE* const ostart = (BYTE*)dst;
+ U32 singleStream = srcSize < 256;
+ symbolEncodingType_e hType = set_compressed;
+ size_t cLitSize;
+
+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+ disableLiteralCompression, (U32)srcSize);
+
+ /* Prepare nextEntropy assuming reusing the existing table */
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+ if (disableLiteralCompression)
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+ /* small ? don't even attempt compression (speed opt) */
+# define COMPRESS_LITERALS_SIZE_MIN 63
+ { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+ if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+ }
+
+ RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+ { HUF_repeat repeat = prevHuf->repeatMode;
+ int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+ if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+ cLitSize = singleStream ?
+ HUF_compress1X_repeat(
+ ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+ HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+ HUF_compress4X_repeat(
+ ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+ HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+ if (repeat != HUF_repeat_none) {
+ /* reused the existing table */
+ DEBUGLOG(5, "Reusing previous huffman table");
+ hType = set_repeat;
+ }
+ }
+
+ if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+ }
+ if (cLitSize==1) {
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+ }
+
+ if (hType == set_compressed) {
+ /* using a newly constructed table */
+ nextHuf->repeatMode = HUF_repeat_check;
+ }
+
+ /* Build header */
+ switch(lhSize)
+ {
+ case 3: /* 2 - 2 - 10 - 10 */
+ { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+ MEM_writeLE24(ostart, lhc);
+ break;
+ }
+ case 4: /* 2 - 2 - 14 - 14 */
+ { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+ MEM_writeLE32(ostart, lhc);
+ break;
+ }
+ case 5: /* 2 - 2 - 18 - 18 */
+ { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+ MEM_writeLE32(ostart, lhc);
+ ostart[4] = (BYTE)(cLitSize >> 10);
+ break;
+ }
+ default: /* not possible : lhSize is {3,4,5} */
+ assert(0);
+ }
+ DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+ return lhSize+cLitSize;
+}
diff --git a/zstd/lib/compress/zstd_compress_literals.h b/zstd/lib/compress/zstd_compress_literals.h
new file mode 100644
index 0000000..8b08705
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_literals.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_strategy strategy, int disableLiteralCompression,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
+ const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/zstd/lib/compress/zstd_compress_sequences.c b/zstd/lib/compress/zstd_compress_sequences.c
new file mode 100644
index 0000000..f9f8097
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_sequences.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ * Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/**
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+ 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+ 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
+ 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
+ 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
+ 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
+ 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
+ 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
+ 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
+ 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
+ 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
+ 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
+ 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
+ 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
+ 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
+ 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
+ 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
+ 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
+ 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
+ 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
+ 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
+ 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
+ 5, 4, 2, 1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+ void const* ptr = ctable;
+ U16 const* u16ptr = (U16 const*)ptr;
+ U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+ return maxSymbolValue;
+}
+
+/**
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+ size_t const nbSeq, unsigned const FSELog)
+{
+ BYTE wksp[FSE_NCOUNTBOUND];
+ S16 norm[MaxSeq + 1];
+ const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+ FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
+ return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+ unsigned cost = 0;
+ unsigned s;
+ for (s = 0; s <= max; ++s) {
+ unsigned norm = (unsigned)((256 * count[s]) / total);
+ if (count[s] != 0 && norm == 0)
+ norm = 1;
+ assert(count[s] < total);
+ cost += count[s] * kInverseProbabilityLog256[norm];
+ }
+ return cost >> 8;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+ FSE_CTable const* ctable,
+ unsigned const* count,
+ unsigned const max)
+{
+ unsigned const kAccuracyLog = 8;
+ size_t cost = 0;
+ unsigned s;
+ FSE_CState_t cstate;
+ FSE_initCState(&cstate, ctable);
+ if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+ DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+ ZSTD_getFSEMaxSymbolValue(ctable), max);
+ return ERROR(GENERIC);
+ }
+ for (s = 0; s <= max; ++s) {
+ unsigned const tableLog = cstate.stateLog;
+ unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+ unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+ if (count[s] == 0)
+ continue;
+ if (bitCost >= badCost) {
+ DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+ return ERROR(GENERIC);
+ }
+ cost += (size_t)count[s] * bitCost;
+ }
+ return cost >> kAccuracyLog;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+ unsigned const* count, unsigned const max)
+{
+ unsigned const shift = 8 - accuracyLog;
+ size_t cost = 0;
+ unsigned s;
+ assert(accuracyLog <= 8);
+ for (s = 0; s <= max; ++s) {
+ unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+ unsigned const norm256 = normAcc << shift;
+ assert(norm256 > 0);
+ assert(norm256 < 256);
+ cost += count[s] * kInverseProbabilityLog256[norm256];
+ }
+ return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+ FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+ size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+ FSE_CTable const* prevCTable,
+ short const* defaultNorm, U32 defaultNormLog,
+ ZSTD_defaultPolicy_e const isDefaultAllowed,
+ ZSTD_strategy const strategy)
+{
+ ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+ if (mostFrequent == nbSeq) {
+ *repeatMode = FSE_repeat_none;
+ if (isDefaultAllowed && nbSeq <= 2) {
+ /* Prefer set_basic over set_rle when there are 2 or less symbols,
+ * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+ * If basic encoding isn't possible, always choose RLE.
+ */
+ DEBUGLOG(5, "Selected set_basic");
+ return set_basic;
+ }
+ DEBUGLOG(5, "Selected set_rle");
+ return set_rle;
+ }
+ if (strategy < ZSTD_lazy) {
+ if (isDefaultAllowed) {
+ size_t const staticFse_nbSeq_max = 1000;
+ size_t const mult = 10 - strategy;
+ size_t const baseLog = 3;
+ size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
+ assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
+ assert(mult <= 9 && mult >= 7);
+ if ( (*repeatMode == FSE_repeat_valid)
+ && (nbSeq < staticFse_nbSeq_max) ) {
+ DEBUGLOG(5, "Selected set_repeat");
+ return set_repeat;
+ }
+ if ( (nbSeq < dynamicFse_nbSeq_min)
+ || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+ DEBUGLOG(5, "Selected set_basic");
+ /* The format allows default tables to be repeated, but it isn't useful.
+ * When using simple heuristics to select encoding type, we don't want
+ * to confuse these tables with dictionaries. When running more careful
+ * analysis, we don't need to waste time checking both repeating tables
+ * and default tables.
+ */
+ *repeatMode = FSE_repeat_none;
+ return set_basic;
+ }
+ }
+ } else {
+ size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+ size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+ size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+ size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+ if (isDefaultAllowed) {
+ assert(!ZSTD_isError(basicCost));
+ assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+ }
+ assert(!ZSTD_isError(NCountCost));
+ assert(compressedCost < ERROR(maxCode));
+ DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+ (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+ if (basicCost <= repeatCost && basicCost <= compressedCost) {
+ DEBUGLOG(5, "Selected set_basic");
+ assert(isDefaultAllowed);
+ *repeatMode = FSE_repeat_none;
+ return set_basic;
+ }
+ if (repeatCost <= compressedCost) {
+ DEBUGLOG(5, "Selected set_repeat");
+ assert(!ZSTD_isError(repeatCost));
+ return set_repeat;
+ }
+ assert(compressedCost < basicCost && compressedCost < repeatCost);
+ }
+ DEBUGLOG(5, "Selected set_compressed");
+ *repeatMode = FSE_repeat_check;
+ return set_compressed;
+}
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+ FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+ unsigned* count, U32 max,
+ const BYTE* codeTable, size_t nbSeq,
+ const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+ const FSE_CTable* prevCTable, size_t prevCTableSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+ BYTE* op = (BYTE*)dst;
+ const BYTE* const oend = op + dstCapacity;
+ DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+ switch (type) {
+ case set_rle:
+ FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+ RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+ *op = codeTable[0];
+ return 1;
+ case set_repeat:
+ memcpy(nextCTable, prevCTable, prevCTableSize);
+ return 0;
+ case set_basic:
+ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
+ return 0;
+ case set_compressed: {
+ S16 norm[MaxSeq + 1];
+ size_t nbSeq_1 = nbSeq;
+ const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+ if (count[codeTable[nbSeq-1]] > 1) {
+ count[codeTable[nbSeq-1]]--;
+ nbSeq_1--;
+ }
+ assert(nbSeq_1 > 1);
+ FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
+ { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
+ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
+ return NCountSize;
+ }
+ }
+ default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+ }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ BIT_CStream_t blockStream;
+ FSE_CState_t stateMatchLength;
+ FSE_CState_t stateOffsetBits;
+ FSE_CState_t stateLitLength;
+
+ RETURN_ERROR_IF(
+ ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+ dstSize_tooSmall, "not enough space remaining");
+ DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
+ (int)(blockStream.endPtr - blockStream.startPtr),
+ (unsigned)dstCapacity);
+
+ /* first symbols */
+ FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+ FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
+ FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
+ BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+ if (MEM_32bits()) BIT_flushBits(&blockStream);
+ BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+ if (MEM_32bits()) BIT_flushBits(&blockStream);
+ if (longOffsets) {
+ U32 const ofBits = ofCodeTable[nbSeq-1];
+ unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+ if (extraBits) {
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+ BIT_flushBits(&blockStream);
+ }
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+ ofBits - extraBits);
+ } else {
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+ }
+ BIT_flushBits(&blockStream);
+
+ { size_t n;
+ for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
+ BYTE const llCode = llCodeTable[n];
+ BYTE const ofCode = ofCodeTable[n];
+ BYTE const mlCode = mlCodeTable[n];
+ U32 const llBits = LL_bits[llCode];
+ U32 const ofBits = ofCode;
+ U32 const mlBits = ML_bits[mlCode];
+ DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+ (unsigned)sequences[n].litLength,
+ (unsigned)sequences[n].matchLength + MINMATCH,
+ (unsigned)sequences[n].offset);
+ /* 32b*/ /* 64b*/
+ /* (7)*/ /* (7)*/
+ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
+ FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
+ if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
+ FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
+ if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+ BIT_flushBits(&blockStream); /* (7)*/
+ BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+ if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+ BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+ if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+ if (longOffsets) {
+ unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+ if (extraBits) {
+ BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+ BIT_flushBits(&blockStream); /* (7)*/
+ }
+ BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+ ofBits - extraBits); /* 31 */
+ } else {
+ BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
+ }
+ BIT_flushBits(&blockStream); /* (7)*/
+ DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+ } }
+
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+ FSE_flushCState(&blockStream, &stateMatchLength);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+ FSE_flushCState(&blockStream, &stateOffsetBits);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+ FSE_flushCState(&blockStream, &stateLitLength);
+
+ { size_t const streamSize = BIT_closeCStream(&blockStream);
+ RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+ return streamSize;
+ }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+ DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+ }
+#endif
+ (void)bmi2;
+ return ZSTD_encodeSequences_default(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
diff --git a/zstd/lib/compress/zstd_compress_sequences.h b/zstd/lib/compress/zstd_compress_sequences.h
new file mode 100644
index 0000000..68c6f9a
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_sequences.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
+#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+ ZSTD_defaultDisallowed = 0,
+ ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+ FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+ size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+ FSE_CTable const* prevCTable,
+ short const* defaultNorm, U32 defaultNormLog,
+ ZSTD_defaultPolicy_e const isDefaultAllowed,
+ ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+ FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+ unsigned* count, U32 max,
+ const BYTE* codeTable, size_t nbSeq,
+ const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+ const FSE_CTable* prevCTable, size_t prevCTableSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+ FSE_CTable const* ctable,
+ unsigned const* count,
+ unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+ unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
diff --git a/zstd/lib/compress/zstd_compress_superblock.c b/zstd/lib/compress/zstd_compress_superblock.c
new file mode 100644
index 0000000..b693866
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_superblock.c
@@ -0,0 +1,845 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ * Dependencies
+ ***************************************/
+#include "zstd_compress_superblock.h"
+
+#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
+#include "hist.h" /* HIST_countFast_wksp */
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+
+/*-*************************************
+* Superblock entropy buffer structs
+***************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ * Stores Literals Block Type for a super-block in hType, and
+ * huffman tree description in hufDesBuffer.
+ * hufDesSize refers to the size of huffman tree description in bytes.
+ * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+ symbolEncodingType_e hType;
+ BYTE hufDesBuffer[500]; /* TODO give name to this value */
+ size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ * fse tables in fseTablesBuffer.
+ * fseTablesSize refers to the size of fse tables in bytes.
+ * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+ symbolEncodingType_e llType;
+ symbolEncodingType_e ofType;
+ symbolEncodingType_e mlType;
+ BYTE fseTablesBuffer[500]; /* TODO give name to this value */
+ size_t fseTablesSize;
+ size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+ ZSTD_hufCTablesMetadata_t hufMetadata;
+ ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/** ZSTD_buildSuperBlockEntropy_literal() :
+ * Builds entropy for the super-block literals.
+ * Stores literals block type (raw, rle, compressed, repeat) and
+ * huffman description table to hufMetadata.
+ * @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+ const ZSTD_hufCTables_t* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_hufCTablesMetadata_t* hufMetadata,
+ const int disableLiteralsCompression,
+ void* workspace, size_t wkspSize)
+{
+ BYTE* const wkspStart = (BYTE*)workspace;
+ BYTE* const wkspEnd = wkspStart + wkspSize;
+ BYTE* const countWkspStart = wkspStart;
+ unsigned* const countWksp = (unsigned*)workspace;
+ const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+ BYTE* const nodeWksp = countWkspStart + countWkspSize;
+ const size_t nodeWkspSize = wkspEnd-nodeWksp;
+ unsigned maxSymbolValue = 255;
+ unsigned huffLog = HUF_TABLELOG_DEFAULT;
+ HUF_repeat repeat = prevHuf->repeatMode;
+
+ DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+ /* Prepare nextEntropy assuming reusing the existing table */
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+ if (disableLiteralsCompression) {
+ DEBUGLOG(5, "set_basic - disabled");
+ hufMetadata->hType = set_basic;
+ return 0;
+ }
+
+ /* small ? don't even attempt compression (speed opt) */
+# define COMPRESS_LITERALS_SIZE_MIN 63
+ { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+ if (srcSize <= minLitSize) {
+ DEBUGLOG(5, "set_basic - too small");
+ hufMetadata->hType = set_basic;
+ return 0;
+ }
+ }
+
+ /* Scan input and build symbol stats */
+ { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+ FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+ if (largest == srcSize) {
+ DEBUGLOG(5, "set_rle");
+ hufMetadata->hType = set_rle;
+ return 0;
+ }
+ if (largest <= (srcSize >> 7)+4) {
+ DEBUGLOG(5, "set_basic - no gain");
+ hufMetadata->hType = set_basic;
+ return 0;
+ }
+ }
+
+ /* Validate the previous Huffman table */
+ if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+ repeat = HUF_repeat_none;
+ }
+
+ /* Build Huffman Tree */
+ memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+ { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+ maxSymbolValue, huffLog,
+ nodeWksp, nodeWkspSize);
+ FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+ huffLog = (U32)maxBits;
+ { /* Build and write the CTable */
+ size_t const newCSize = HUF_estimateCompressedSize(
+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+ size_t const hSize = HUF_writeCTable(
+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
+ /* Check against repeating the previous CTable */
+ if (repeat != HUF_repeat_none) {
+ size_t const oldCSize = HUF_estimateCompressedSize(
+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+ DEBUGLOG(5, "set_repeat - smaller");
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ hufMetadata->hType = set_repeat;
+ return 0;
+ }
+ }
+ if (newCSize + hSize >= srcSize) {
+ DEBUGLOG(5, "set_basic - no gains");
+ memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ hufMetadata->hType = set_basic;
+ return 0;
+ }
+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+ hufMetadata->hType = set_compressed;
+ nextHuf->repeatMode = HUF_repeat_check;
+ return hSize;
+ }
+ }
+}
+
+/** ZSTD_buildSuperBlockEntropy_sequences() :
+ * Builds entropy for the super-block sequences.
+ * Stores symbol compression modes and fse table to fseMetadata.
+ * @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+ const ZSTD_fseCTables_t* prevEntropy,
+ ZSTD_fseCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ ZSTD_fseCTablesMetadata_t* fseMetadata,
+ void* workspace, size_t wkspSize)
+{
+ BYTE* const wkspStart = (BYTE*)workspace;
+ BYTE* const wkspEnd = wkspStart + wkspSize;
+ BYTE* const countWkspStart = wkspStart;
+ unsigned* const countWksp = (unsigned*)workspace;
+ const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+ BYTE* const cTableWksp = countWkspStart + countWkspSize;
+ const size_t cTableWkspSize = wkspEnd-cTableWksp;
+ ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+ FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+ FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+ FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+ const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+ const BYTE* const llCodeTable = seqStorePtr->llCode;
+ const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+ size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+ BYTE* const ostart = fseMetadata->fseTablesBuffer;
+ BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+ BYTE* op = ostart;
+
+ assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+ DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+ memset(workspace, 0, wkspSize);
+
+ fseMetadata->lastCountSize = 0;
+ /* convert length/distances into codes */
+ ZSTD_seqToCodes(seqStorePtr);
+ /* build CTable for Literal Lengths */
+ { U32 LLtype;
+ unsigned max = MaxLL;
+ size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ DEBUGLOG(5, "Building LL table");
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+ LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+ countWksp, max, mostFrequent, nbSeq,
+ LLFSELog, prevEntropy->litlengthCTable,
+ LL_defaultNorm, LL_defaultNormLog,
+ ZSTD_defaultAllowed, strategy);
+ assert(set_basic < set_compressed && set_rle < set_compressed);
+ assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+ countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+ prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+ cTableWksp, cTableWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+ if (LLtype == set_compressed)
+ fseMetadata->lastCountSize = countSize;
+ op += countSize;
+ fseMetadata->llType = (symbolEncodingType_e) LLtype;
+ } }
+ /* build CTable for Offsets */
+ { U32 Offtype;
+ unsigned max = MaxOff;
+ size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+ DEBUGLOG(5, "Building OF table");
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+ Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+ countWksp, max, mostFrequent, nbSeq,
+ OffFSELog, prevEntropy->offcodeCTable,
+ OF_defaultNorm, OF_defaultNormLog,
+ defaultPolicy, strategy);
+ assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+ countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+ prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+ cTableWksp, cTableWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+ if (Offtype == set_compressed)
+ fseMetadata->lastCountSize = countSize;
+ op += countSize;
+ fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+ } }
+ /* build CTable for MatchLengths */
+ { U32 MLtype;
+ unsigned max = MaxML;
+ size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+ MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+ countWksp, max, mostFrequent, nbSeq,
+ MLFSELog, prevEntropy->matchlengthCTable,
+ ML_defaultNorm, ML_defaultNormLog,
+ ZSTD_defaultAllowed, strategy);
+ assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+ countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+ prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+ cTableWksp, cTableWkspSize);
+ FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+ if (MLtype == set_compressed)
+ fseMetadata->lastCountSize = countSize;
+ op += countSize;
+ fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+ } }
+ assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+ return op-ostart;
+}
+
+
+/** ZSTD_buildSuperBlockEntropy() :
+ * Builds entropy for the super-block.
+ * @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+ const ZSTD_entropyCTables_t* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ void* workspace, size_t wkspSize)
+{
+ size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+ DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+ entropyMetadata->hufMetadata.hufDesSize =
+ ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+ &prevEntropy->huf, &nextEntropy->huf,
+ &entropyMetadata->hufMetadata,
+ ZSTD_disableLiteralsCompression(cctxParams),
+ workspace, wkspSize);
+ FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+ entropyMetadata->fseMetadata.fseTablesSize =
+ ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+ &prevEntropy->fse, &nextEntropy->fse,
+ cctxParams,
+ &entropyMetadata->fseMetadata,
+ workspace, wkspSize);
+ FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+ return 0;
+}
+
+/** ZSTD_compressSubBlock_literal() :
+ * Compresses literals section for a sub-block.
+ * When we have to write the Huffman table we will sometimes choose a header
+ * size larger than necessary. This is because we have to pick the header size
+ * before we know the table size + compressed size, so we have a bound on the
+ * table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
+ * in writing the header, otherwise it is set to 0.
+ *
+ * hufMetadata->hType has literals block type info.
+ * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ * @return : compressed size of literals section of a sub-block
+ * Or 0 if it unable to compress.
+ * Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
+ const BYTE* literals, size_t litSize,
+ void* dst, size_t dstSize,
+ const int bmi2, int writeEntropy, int* entropyWritten)
+{
+ size_t const header = writeEntropy ? 200 : 0;
+ size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = ostart + dstSize;
+ BYTE* op = ostart + lhSize;
+ U32 const singleStream = lhSize == 3;
+ symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+ size_t cLitSize = 0;
+
+ (void)bmi2; /* TODO bmi2... */
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+ *entropyWritten = 0;
+ if (litSize == 0 || hufMetadata->hType == set_basic) {
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+ return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+ } else if (hufMetadata->hType == set_rle) {
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+ return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+ }
+
+ assert(litSize > 0);
+ assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+ if (writeEntropy && hufMetadata->hType == set_compressed) {
+ memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+ op += hufMetadata->hufDesSize;
+ cLitSize += hufMetadata->hufDesSize;
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+ }
+
+ /* TODO bmi2 */
+ { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+ : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+ op += cSize;
+ cLitSize += cSize;
+ if (cSize == 0 || ERR_isError(cSize)) {
+ DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+ return 0;
+ }
+ /* If we expand and we aren't writing a header then emit uncompressed */
+ if (!writeEntropy && cLitSize >= litSize) {
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+ return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+ }
+ /* If we are writing headers then allow expansion that doesn't change our header size. */
+ if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+ assert(cLitSize > litSize);
+ DEBUGLOG(5, "Literals expanded beyond allowed header size");
+ return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+ }
+ DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+ }
+
+ /* Build header */
+ switch(lhSize)
+ {
+ case 3: /* 2 - 2 - 10 - 10 */
+ { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+ MEM_writeLE24(ostart, lhc);
+ break;
+ }
+ case 4: /* 2 - 2 - 14 - 14 */
+ { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+ MEM_writeLE32(ostart, lhc);
+ break;
+ }
+ case 5: /* 2 - 2 - 18 - 18 */
+ { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+ MEM_writeLE32(ostart, lhc);
+ ostart[4] = (BYTE)(cLitSize >> 10);
+ break;
+ }
+ default: /* not possible : lhSize is {3,4,5} */
+ assert(0);
+ }
+ *entropyWritten = 1;
+ DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+ return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+ const seqDef* const sstart = sequences;
+ const seqDef* const send = sequences + nbSeq;
+ const seqDef* sp = sstart;
+ size_t matchLengthSum = 0;
+ size_t litLengthSum = 0;
+ while (send-sp > 0) {
+ ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+ litLengthSum += seqLen.litLength;
+ matchLengthSum += seqLen.matchLength;
+ sp++;
+ }
+ assert(litLengthSum <= litSize);
+ if (!lastSequence) {
+ assert(litLengthSum == litSize);
+ }
+ return matchLengthSum + litSize;
+}
+
+/** ZSTD_compressSubBlock_sequences() :
+ * Compresses sequences section for a sub-block.
+ * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ * symbol compression modes for the super-block.
+ * The first successfully compressed block will have these in its header.
+ * We set entropyWritten=1 when we succeed in compressing the sequences.
+ * The following sub-blocks will always have repeat mode.
+ * @return : compressed size of sequences section of a sub-block
+ * Or 0 if it is unable to compress
+ * Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
+ const seqDef* sequences, size_t nbSeq,
+ const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+ const ZSTD_CCtx_params* cctxParams,
+ void* dst, size_t dstCapacity,
+ const int bmi2, int writeEntropy, int* entropyWritten)
+{
+ const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = ostart + dstCapacity;
+ BYTE* op = ostart;
+ BYTE* seqHead;
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+ *entropyWritten = 0;
+ /* Sequences Header */
+ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+ dstSize_tooSmall, "");
+ if (nbSeq < 0x7F)
+ *op++ = (BYTE)nbSeq;
+ else if (nbSeq < LONGNBSEQ)
+ op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+ else
+ op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+ if (nbSeq==0) {
+ return op - ostart;
+ }
+
+ /* seqHead : flags for FSE encoding type */
+ seqHead = op++;
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+ if (writeEntropy) {
+ const U32 LLtype = fseMetadata->llType;
+ const U32 Offtype = fseMetadata->ofType;
+ const U32 MLtype = fseMetadata->mlType;
+ DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+ *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+ memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+ op += fseMetadata->fseTablesSize;
+ } else {
+ const U32 repeat = set_repeat;
+ *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+ }
+
+ { size_t const bitstreamSize = ZSTD_encodeSequences(
+ op, oend - op,
+ fseTables->matchlengthCTable, mlCode,
+ fseTables->offcodeCTable, ofCode,
+ fseTables->litlengthCTable, llCode,
+ sequences, nbSeq,
+ longOffsets, bmi2);
+ FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+ op += bitstreamSize;
+ /* zstd versions <= 1.3.4 mistakenly report corruption when
+ * FSE_readNCount() receives a buffer < 4 bytes.
+ * Fixed by https://github.com/facebook/zstd/pull/1146.
+ * This can happen when the last set_compressed table present is 2
+ * bytes and the bitstream is only one byte.
+ * In this exceedingly rare case, we will simply emit an uncompressed
+ * block, since it isn't worth optimizing.
+ */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+ /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+ assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+ DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+ "emitting an uncompressed block.");
+ return 0;
+ }
+#endif
+ DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+ }
+
+ /* zstd versions <= 1.4.0 mistakenly report error when
+ * sequences section body size is less than 3 bytes.
+ * Fixed by https://github.com/facebook/zstd/pull/1664.
+ * This can happen when the previous sequences section block is compressed
+ * with rle mode and the current block's sequences section is compressed
+ * with repeat mode where sequences section body size can be 1 byte.
+ */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (op-seqHead < 4) {
+ DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+ "an uncompressed block when sequences are < 4 bytes");
+ return 0;
+ }
+#endif
+
+ *entropyWritten = 1;
+ return op - ostart;
+}
+
+/** ZSTD_compressSubBlock() :
+ * Compresses a single sub-block.
+ * @return : compressed size of the sub-block
+ * Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ const seqDef* sequences, size_t nbSeq,
+ const BYTE* literals, size_t litSize,
+ const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+ const ZSTD_CCtx_params* cctxParams,
+ void* dst, size_t dstCapacity,
+ const int bmi2,
+ int writeLitEntropy, int writeSeqEntropy,
+ int* litEntropyWritten, int* seqEntropyWritten,
+ U32 lastBlock)
+{
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = ostart + dstCapacity;
+ BYTE* op = ostart + ZSTD_blockHeaderSize;
+ DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+ litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+ { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+ &entropyMetadata->hufMetadata, literals, litSize,
+ op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+ FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+ if (cLitSize == 0) return 0;
+ op += cLitSize;
+ }
+ { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+ &entropyMetadata->fseMetadata,
+ sequences, nbSeq,
+ llCode, mlCode, ofCode,
+ cctxParams,
+ op, oend-op,
+ bmi2, writeSeqEntropy, seqEntropyWritten);
+ FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+ if (cSeqSize == 0) return 0;
+ op += cSeqSize;
+ }
+ /* Write block header */
+ { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+ MEM_writeLE24(ostart, cBlockHeader24);
+ }
+ return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+ const ZSTD_hufCTables_t* huf,
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
+ void* workspace, size_t wkspSize,
+ int writeEntropy)
+{
+ unsigned* const countWksp = (unsigned*)workspace;
+ unsigned maxSymbolValue = 255;
+ size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+ if (hufMetadata->hType == set_basic) return litSize;
+ else if (hufMetadata->hType == set_rle) return 1;
+ else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+ if (ZSTD_isError(largest)) return litSize;
+ { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+ if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+ return cLitSizeEstimate + literalSectionHeaderSize;
+ } }
+ assert(0); /* impossible */
+ return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+ const BYTE* codeTable, unsigned maxCode,
+ size_t nbSeq, const FSE_CTable* fseCTable,
+ const U32* additionalBits,
+ short const* defaultNorm, U32 defaultNormLog,
+ void* workspace, size_t wkspSize)
+{
+ unsigned* const countWksp = (unsigned*)workspace;
+ const BYTE* ctp = codeTable;
+ const BYTE* const ctStart = ctp;
+ const BYTE* const ctEnd = ctStart + nbSeq;
+ size_t cSymbolTypeSizeEstimateInBits = 0;
+ unsigned max = maxCode;
+
+ HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
+ if (type == set_basic) {
+ cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+ } else if (type == set_rle) {
+ cSymbolTypeSizeEstimateInBits = 0;
+ } else if (type == set_compressed || type == set_repeat) {
+ cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+ }
+ if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+ while (ctp < ctEnd) {
+ if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+ else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+ ctp++;
+ }
+ return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+ const BYTE* llCodeTable,
+ const BYTE* mlCodeTable,
+ size_t nbSeq,
+ const ZSTD_fseCTables_t* fseTables,
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
+ void* workspace, size_t wkspSize,
+ int writeEntropy)
+{
+ size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+ size_t cSeqSizeEstimate = 0;
+ cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+ nbSeq, fseTables->offcodeCTable, NULL,
+ OF_defaultNorm, OF_defaultNormLog,
+ workspace, wkspSize);
+ cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+ nbSeq, fseTables->litlengthCTable, LL_bits,
+ LL_defaultNorm, LL_defaultNormLog,
+ workspace, wkspSize);
+ cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+ nbSeq, fseTables->matchlengthCTable, ML_bits,
+ ML_defaultNorm, ML_defaultNormLog,
+ workspace, wkspSize);
+ if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+ return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+ const BYTE* ofCodeTable,
+ const BYTE* llCodeTable,
+ const BYTE* mlCodeTable,
+ size_t nbSeq,
+ const ZSTD_entropyCTables_t* entropy,
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ void* workspace, size_t wkspSize,
+ int writeLitEntropy, int writeSeqEntropy) {
+ size_t cSizeEstimate = 0;
+ cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+ &entropy->huf, &entropyMetadata->hufMetadata,
+ workspace, wkspSize, writeLitEntropy);
+ cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+ workspace, wkspSize, writeSeqEntropy);
+ return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+ if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+ return 1;
+ if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+ return 1;
+ if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+ return 1;
+ return 0;
+}
+
+/** ZSTD_compressSubBlock_multi() :
+ * Breaks super-block into multiple sub-blocks and compresses them.
+ * Entropy will be written to the first block.
+ * The following blocks will use repeat mode to compress.
+ * All sub-blocks are compressed blocks (no raw or rle blocks).
+ * @return : compressed size of the super block (which is multiple ZSTD blocks)
+ * Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+ const ZSTD_compressedBlockState_t* prevCBlock,
+ ZSTD_compressedBlockState_t* nextCBlock,
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ const ZSTD_CCtx_params* cctxParams,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const int bmi2, U32 lastBlock,
+ void* workspace, size_t wkspSize)
+{
+ const seqDef* const sstart = seqStorePtr->sequencesStart;
+ const seqDef* const send = seqStorePtr->sequences;
+ const seqDef* sp = sstart;
+ const BYTE* const lstart = seqStorePtr->litStart;
+ const BYTE* const lend = seqStorePtr->lit;
+ const BYTE* lp = lstart;
+ BYTE const* ip = (BYTE const*)src;
+ BYTE const* const iend = ip + srcSize;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* const oend = ostart + dstCapacity;
+ BYTE* op = ostart;
+ const BYTE* llCodePtr = seqStorePtr->llCode;
+ const BYTE* mlCodePtr = seqStorePtr->mlCode;
+ const BYTE* ofCodePtr = seqStorePtr->ofCode;
+ size_t targetCBlockSize = cctxParams->targetCBlockSize;
+ size_t litSize, seqCount;
+ int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+ int writeSeqEntropy = 1;
+ int lastSequence = 0;
+
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+ (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+ litSize = 0;
+ seqCount = 0;
+ do {
+ size_t cBlockSizeEstimate = 0;
+ if (sstart == send) {
+ lastSequence = 1;
+ } else {
+ const seqDef* const sequence = sp + seqCount;
+ lastSequence = sequence == send - 1;
+ litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+ seqCount++;
+ }
+ if (lastSequence) {
+ assert(lp <= lend);
+ assert(litSize <= (size_t)(lend - lp));
+ litSize = (size_t)(lend - lp);
+ }
+ /* I think there is an optimization opportunity here.
+ * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+ * since it recalculates estimate from scratch.
+ * For example, it would recount literal distribution and symbol codes everytime.
+ */
+ cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+ &nextCBlock->entropy, entropyMetadata,
+ workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+ if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+ int litEntropyWritten = 0;
+ int seqEntropyWritten = 0;
+ const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+ const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+ sp, seqCount,
+ lp, litSize,
+ llCodePtr, mlCodePtr, ofCodePtr,
+ cctxParams,
+ op, oend-op,
+ bmi2, writeLitEntropy, writeSeqEntropy,
+ &litEntropyWritten, &seqEntropyWritten,
+ lastBlock && lastSequence);
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+ if (cSize > 0 && cSize < decompressedSize) {
+ DEBUGLOG(5, "Committed the sub-block");
+ assert(ip + decompressedSize <= iend);
+ ip += decompressedSize;
+ sp += seqCount;
+ lp += litSize;
+ op += cSize;
+ llCodePtr += seqCount;
+ mlCodePtr += seqCount;
+ ofCodePtr += seqCount;
+ litSize = 0;
+ seqCount = 0;
+ /* Entropy only needs to be written once */
+ if (litEntropyWritten) {
+ writeLitEntropy = 0;
+ }
+ if (seqEntropyWritten) {
+ writeSeqEntropy = 0;
+ }
+ }
+ }
+ } while (!lastSequence);
+ if (writeLitEntropy) {
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+ memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+ }
+ if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+ /* If we haven't written our entropy tables, then we've violated our contract and
+ * must emit an uncompressed block.
+ */
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+ return 0;
+ }
+ if (ip < iend) {
+ size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+ FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+ assert(cSize != 0);
+ op += cSize;
+ /* We have to regenerate the repcodes because we've skipped some sequences */
+ if (sp < send) {
+ seqDef const* seq;
+ repcodes_t rep;
+ memcpy(&rep, prevCBlock->rep, sizeof(rep));
+ for (seq = sstart; seq < sp; ++seq) {
+ rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+ }
+ memcpy(nextCBlock->rep, &rep, sizeof(rep));
+ }
+ }
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+ return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ void const* src, size_t srcSize,
+ unsigned lastBlock) {
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+ FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+ &zc->blockState.prevCBlock->entropy,
+ &zc->blockState.nextCBlock->entropy,
+ &zc->appliedParams,
+ &entropyMetadata,
+ zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+ return ZSTD_compressSubBlock_multi(&zc->seqStore,
+ zc->blockState.prevCBlock,
+ zc->blockState.nextCBlock,
+ &entropyMetadata,
+ &zc->appliedParams,
+ dst, dstCapacity,
+ src, srcSize,
+ zc->bmi2, lastBlock,
+ zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
diff --git a/zstd/lib/compress/zstd_compress_superblock.h b/zstd/lib/compress/zstd_compress_superblock.h
new file mode 100644
index 0000000..07f4cb1
--- /dev/null
+++ b/zstd/lib/compress/zstd_compress_superblock.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+* Dependencies
+***************************************/
+
+#include "../zstd.h" /* ZSTD_CCtx */
+
+/*-*************************************
+* Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ void const* src, size_t srcSize,
+ unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/zstd/lib/compress/zstd_cwksp.h b/zstd/lib/compress/zstd_cwksp.h
new file mode 100644
index 0000000..a25c926
--- /dev/null
+++ b/zstd/lib/compress/zstd_cwksp.h
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+* Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+* Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+* Structures
+***************************************/
+typedef enum {
+ ZSTD_cwksp_alloc_objects,
+ ZSTD_cwksp_alloc_buffers,
+ ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ * - The static objects need to be cleared once and can then be trivially
+ * reused for each compression.
+ *
+ * - Various buffers don't need to be initialized at all--they are always
+ * written into before they're read.
+ *
+ * - The matchstate tables have a unique requirement that they don't need
+ * their memory to be totally cleared, but they do need the memory to have
+ * some bound, i.e., a guarantee that all values in the memory they've been
+ * allocated is less than some maximum value (which is the starting value
+ * for the indices that they will then use for compression). When this
+ * guarantee is provided to them, they can use the memory without any setup
+ * work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ * compressions without having to perform any expensive reallocation or
+ * reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ * multiple compressions **even when the compression parameters change** and
+ * we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [ ... workspace ... ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ * so that literally everything fits in a single buffer. Note: if present,
+ * this must be the first object in the workspace, since ZSTD_free{CCtx,
+ * CDict}() rely on a pointer comparison to see whether one or two frees are
+ * required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ * nonetheless "dynamically" allocated in the workspace so that we can
+ * control how they're initialized separately from the broader ZSTD_CCtx.
+ * Examples:
+ * - Entropy Workspace
+ * - 2 x ZSTD_compressedBlockState_t
+ * - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ * chain tables, binary trees) that all respect a common format: they are
+ * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ * Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ * alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ * any alignment or initialization before they're used. This means they can
+ * be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+ void* workspace;
+ void* workspaceEnd;
+
+ void* objectEnd;
+ void* tableEnd;
+ void* tableValidEnd;
+ void* allocStart;
+
+ int allocFailed;
+ int workspaceOversizedDuration;
+ ZSTD_cwksp_alloc_phase_e phase;
+} ZSTD_cwksp;
+
+/*-*************************************
+* Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+ (void)ws;
+ assert(ws->workspace <= ws->objectEnd);
+ assert(ws->objectEnd <= ws->tableEnd);
+ assert(ws->objectEnd <= ws->tableValidEnd);
+ assert(ws->tableEnd <= ws->allocStart);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+ size_t const mask = align - 1;
+ assert((align & mask) == 0);
+ return (size + mask) & ~mask;
+}
+
+/**
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#else
+ return size;
+#endif
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+ ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+ assert(phase >= ws->phase);
+ if (phase > ws->phase) {
+ if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+ phase >= ZSTD_cwksp_alloc_buffers) {
+ ws->tableValidEnd = ws->objectEnd;
+ }
+ if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+ phase >= ZSTD_cwksp_alloc_aligned) {
+ /* If unaligned allocations down from a too-large top have left us
+ * unaligned, we need to realign our alloc ptr. Technically, this
+ * can consume space that is unaccounted for in the neededSpace
+ * calculation. However, I believe this can only happen when the
+ * workspace is too large, and specifically when it is too large
+ * by a larger margin than the space that will be consumed. */
+ /* TODO: cleaner, compiler warning friendly way to do this??? */
+ ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+ if (ws->allocStart < ws->tableValidEnd) {
+ ws->tableValidEnd = ws->allocStart;
+ }
+ }
+ ws->phase = phase;
+ }
+}
+
+/**
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+ ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+ void* alloc;
+ void* bottom = ws->tableEnd;
+ ZSTD_cwksp_internal_advance_phase(ws, phase);
+ alloc = (BYTE *)ws->allocStart - bytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* over-reserve space */
+ alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+ DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+ alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ assert(alloc >= bottom);
+ if (alloc < bottom) {
+ DEBUGLOG(4, "cwksp: alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ if (alloc < ws->tableValidEnd) {
+ ws->tableValidEnd = alloc;
+ }
+ ws->allocStart = alloc;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+ * either size. */
+ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+ return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+ assert((bytes & (sizeof(U32)-1)) == 0);
+ return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/**
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+ void* alloc = ws->tableEnd;
+ void* end = (BYTE *)alloc + bytes;
+ void* top = ws->allocStart;
+
+ DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+ alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+ assert((bytes & (sizeof(U32)-1)) == 0);
+ ZSTD_cwksp_internal_advance_phase(ws, phase);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ assert(end <= top);
+ if (end > top) {
+ DEBUGLOG(4, "cwksp: table alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ ws->tableEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+ size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+ void* alloc = ws->objectEnd;
+ void* end = (BYTE*)alloc + roundedBytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* over-reserve space */
+ end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+ DEBUGLOG(5,
+ "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+ alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+ assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+ assert((bytes & (sizeof(void*)-1)) == 0);
+ ZSTD_cwksp_assert_internal_consistency(ws);
+ /* we must be in the first phase, no advance is possible */
+ if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+ DEBUGLOG(4, "cwksp: object alloc failed!");
+ ws->allocFailed = 1;
+ return NULL;
+ }
+ ws->objectEnd = end;
+ ws->tableEnd = end;
+ ws->tableValidEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+ * either size. */
+ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+ __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+ return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the table re-use logic is sound, and that we don't
+ * access table space that we haven't cleaned, we re-"poison" the table
+ * space every time we mark it dirty. */
+ {
+ size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+ assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+ __msan_poison(ws->objectEnd, size);
+ }
+#endif
+
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ ws->tableValidEnd = ws->objectEnd;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ if (ws->tableValidEnd < ws->tableEnd) {
+ ws->tableValidEnd = ws->tableEnd;
+ }
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+ assert(ws->tableValidEnd >= ws->objectEnd);
+ assert(ws->tableValidEnd <= ws->allocStart);
+ if (ws->tableValidEnd < ws->tableEnd) {
+ memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+ }
+ ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: clearing tables!");
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ {
+ size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+ __asan_poison_memory_region(ws->objectEnd, size);
+ }
+#endif
+
+ ws->tableEnd = ws->objectEnd;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+ DEBUGLOG(4, "cwksp: clearing!");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+ /* To validate that the context re-use logic is sound, and that we don't
+ * access stuff that this compression hasn't initialized, we re-"poison"
+ * the workspace (or at least the non-static, non-table parts of it)
+ * every time we start a new compression. */
+ {
+ size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+ __msan_poison(ws->tableValidEnd, size);
+ }
+#endif
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+ {
+ size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
+ __asan_poison_memory_region(ws->objectEnd, size);
+ }
+#endif
+
+ ws->tableEnd = ws->objectEnd;
+ ws->allocStart = ws->workspaceEnd;
+ ws->allocFailed = 0;
+ if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+ ws->phase = ZSTD_cwksp_alloc_buffers;
+ }
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+ DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+ assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+ ws->workspace = start;
+ ws->workspaceEnd = (BYTE*)start + size;
+ ws->objectEnd = ws->workspace;
+ ws->tableValidEnd = ws->objectEnd;
+ ws->phase = ZSTD_cwksp_alloc_objects;
+ ZSTD_cwksp_clear(ws);
+ ws->workspaceOversizedDuration = 0;
+ ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+ void* workspace = ZSTD_malloc(size, customMem);
+ DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+ RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+ ZSTD_cwksp_init(ws, workspace, size);
+ return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+ void *ptr = ws->workspace;
+ DEBUGLOG(4, "cwksp: freeing workspace");
+ memset(ws, 0, sizeof(ZSTD_cwksp));
+ ZSTD_free(ptr, customMem);
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before its used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+ *dst = *src;
+ memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+ return ws->allocFailed;
+}
+
+/*-*************************************
+* Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+ return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_check_available(
+ ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+ && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+ ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+ if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+ ws->workspaceOversizedDuration++;
+ } else {
+ ws->workspaceOversizedDuration = 0;
+ }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
diff --git a/zstd/lib/compress/zstd_double_fast.c b/zstd/lib/compress/zstd_double_fast.c
index 47faf6d..27eed66 100644
--- a/zstd/lib/compress/zstd_double_fast.c
+++ b/zstd/lib/compress/zstd_double_fast.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
- }
- }
+ } }
}
@@ -63,7 +62,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 prefixLowestIndex = ms->window.dictLimit;
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -93,14 +94,23 @@ size_t ZSTD_compressBlock_doubleFast_generic(
dictCParams->hashLog : hBitsL;
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
dictCParams->chainLog : hBitsS;
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+ /* if a dictionary is attached, it must be within window range */
+ if (dictMode == ZSTD_dictMatchState) {
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+ }
+
/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
- U32 const maxRep = (U32)(ip - prefixLowest);
+ U32 const current = (U32)(ip - base);
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+ U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
@@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -170,8 +180,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found;
- }
- }
+ } }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
@@ -186,16 +195,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
- }
- }
+ } }
ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+ PREFETCH_L1(ip+256);
+#endif
continue;
_search_next_long:
- {
- size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+ { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
@@ -221,9 +231,7 @@ _search_next_long:
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found;
- }
- }
- }
+ } } }
/* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@@ -242,7 +250,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
@@ -250,11 +258,14 @@ _match_stored:
anchor = ip;
if (ip <= ilimit) {
- /* Fill Table */
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+ /* Complementary insertion */
+ /* done after iLimit test, as candidates could be > iend-8 */
+ { U32 const indexToInsert = current+2;
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+ }
/* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) {
@@ -263,14 +274,14 @@ _match_stored:
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
&& repIndex2 < prefixLowestIndex ?
- dictBase - dictIndexDelta + repIndex2 :
+ dictBase + repIndex2 - dictIndexDelta :
base + repIndex2;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -278,8 +289,7 @@ _match_stored:
continue;
}
break;
- }
- }
+ } }
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
@@ -290,18 +300,19 @@ _match_stored:
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
- } } } }
+ } } }
+ } /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -360,10 +371,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const base = ms->window.base;
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+ const U32 dictStartIndex = lowLimit;
+ const U32 dictLimit = ms->window.dictLimit;
+ const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
- const U32 dictStartIndex = ms->window.lowLimit;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +385,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+ /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+ if (prefixStartIndex == dictStartIndex)
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -396,7 +414,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -407,7 +425,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -432,23 +450,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
} }
- /* found a match : store it */
+ /* move to next sequence start */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
- /* Fill Table */
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+ /* Complementary insertion */
+ /* done after iLimit test, as candidates could be > iend-8 */
+ { U32 const indexToInsert = current+2;
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+ }
+
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
@@ -460,7 +482,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -475,7 +497,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
diff --git a/zstd/lib/compress/zstd_double_fast.h b/zstd/lib/compress/zstd_double_fast.h
index 4fa31ac..14d944d 100644
--- a/zstd/lib/compress/zstd_double_fast.h
+++ b/zstd/lib/compress/zstd_double_fast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
extern "C" {
#endif
-#include "mem.h" /* U32 */
+#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
diff --git a/zstd/lib/compress/zstd_fast.c b/zstd/lib/compress/zstd_fast.c
index ed997b4..85a3a7a 100644
--- a/zstd/lib/compress/zstd_fast.c
+++ b/zstd/lib/compress/zstd_fast.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -8,12 +8,13 @@
* You may select, at your option, one of the above-listed licenses.
*/
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
- void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+ const void* const end,
+ ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
@@ -41,8 +42,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } }
}
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls)
@@ -58,7 +60,8 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* anchor = istart;
- const U32 prefixStartIndex = ms->window.dictLimit;
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -66,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
U32 offsetSaved = 0;
/* init */
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
- {
- U32 const maxRep = (U32)(ip0 - prefixStart);
+ { U32 const current = (U32)(ip0 - base);
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+ U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
/* Main Search Loop */
+#ifdef __INTEL_COMPILER
+ /* From intel 'The vector pragma indicates that the loop should be
+ * vectorized if it is legal to do so'. Can be used together with
+ * #pragma ivdep (but have opted to exclude that because intel
+ * warns against using it).*/
+ #pragma vector always
+#endif
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
size_t mLength;
BYTE const* ip2 = ip0 + 2;
@@ -86,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
U32 const current1 = (U32)(ip1-base);
U32 const matchIndex0 = hashTable[h0];
U32 const matchIndex1 = hashTable[h1];
- BYTE const* repMatch = ip2-offset_1;
+ BYTE const* repMatch = ip2 - offset_1;
const BYTE* match0 = base + matchIndex0;
const BYTE* match1 = base + matchIndex1;
U32 offcode;
+
+#if defined(__aarch64__)
+ PREFETCH_L1(ip0+256);
+#endif
+
hashTable[h0] = current0; /* update hash table */
hashTable[h1] = current1; /* update hash table */
assert(ip0 + 1 == ip1);
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
ip0 = ip2 - mLength;
match0 = repMatch - mLength;
+ mLength += 4;
offcode = 0;
goto _match;
}
@@ -112,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
match0 = match1;
goto _offset;
}
- {
- size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
assert(step >= 2);
ip0 += step;
ip1 += step;
@@ -124,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
offset_2 = offset_1;
offset_1 = (U32)(ip0-match0);
offcode = offset_1 + ZSTD_REP_MOVE;
- mLength = 0;
+ mLength = 4;
/* Count the backwards match length */
while (((ip0>anchor) & (match0>prefixStart))
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
_match: /* Requires: ip0, match0, offcode */
/* Count the forward length */
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
- ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
/* match found */
ip0 += mLength;
anchor = ip0;
- ip1 = ip0 + 1;
if (ip0 <= ilimit) {
/* Fill Table */
@@ -144,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
- while ( (ip0 <= ilimit)
- && ( (offset_2>0)
- & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
- /* store sequence */
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
- ip0 += rLength;
- ip1 = ip0 + 1;
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
- anchor = ip0;
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
- }
- }
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+ /* store sequence */
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+ ip0 += rLength;
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+ anchor = ip0;
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
+ } } }
+ ip1 = ip0 + 1;
}
/* save reps for next block */
@@ -165,7 +179,7 @@ _match: /* Requires: ip0, match0, offcode */
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -173,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls)
{
@@ -222,11 +235,19 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
- /* otherwise, we would get index underflow when translating a dict index
- * into a local index */
+ /* if a dictionary is still attached, it necessarily means that
+ * it is within window size. So we just check it. */
+ const U32 maxDistance = 1U << cParams->windowLog;
+ const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+ assert(endIndex - prefixStartIndex <= maxDistance);
+ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
+
+ /* ensure there will be no no underflow
+ * when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
@@ -251,7 +272,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -271,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
@@ -286,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@@ -311,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@@ -327,15 +348,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState != NULL);
switch(mls)
{
@@ -366,15 +386,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 dictStartIndex = ms->window.lowLimit;
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+ const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
- const U32 prefixStartIndex = ms->window.dictLimit;
+ const U32 dictLimit = ms->window.dictLimit;
+ const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+ /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+ if (prefixStartIndex == dictStartIndex)
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@@ -385,16 +414,18 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const U32 repIndex = current + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- size_t mLength;
hashTable[h] = current; /* update hash table */
+ DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
assert(offset_1 <= current +1); /* check repIndex */
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+ ip += rLength;
+ anchor = ip;
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@@ -402,21 +433,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
- U32 offset;
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+ U32 const offset = current - matchIndex;
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
- offset = current - matchIndex;
- offset_2 = offset_1;
- offset_1 = offset;
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ip += mLength;
+ anchor = ip;
} }
- /* found a match : store it */
- ip += mLength;
- anchor = ip;
-
if (ip <= ilimit) {
/* Fill Table */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@@ -425,13 +452,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@@ -445,7 +472,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -453,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- ZSTD_compressionParameters const* cParams = &ms->cParams;
- U32 const mls = cParams->minMatch;
+ U32 const mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
diff --git a/zstd/lib/compress/zstd_fast.h b/zstd/lib/compress/zstd_fast.h
index b74a88c..cf6aaa8 100644
--- a/zstd/lib/compress/zstd_fast.h
+++ b/zstd/lib/compress/zstd_fast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
extern "C" {
#endif
-#include "mem.h" /* U32 */
+#include "../common/mem.h" /* U32 */
#include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
diff --git a/zstd/lib/compress/zstd_lazy.c b/zstd/lib/compress/zstd_lazy.c
index 53f998a..4cf5c88 100644
--- a/zstd/lib/compress/zstd_lazy.c
+++ b/zstd/lib/compress/zstd_lazy.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
- U32 const windowLow = ms->window.lowLimit;
+ U32 const windowValid = ms->window.lowLimit;
+ U32 const maxDistance = 1U << cParams->windowLog;
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow);
@@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
- U32 const windowLow = ms->window.lowLimit;
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
- const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
+ const U32 maxDistance = 1U << cParams->windowLog;
+ const U32 lowestValid = ms->window.lowLimit;
+ const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
@@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
/* *******************************
* Common parser - lazy strategy
*********************************/
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_generic(
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth,
+ const searchMethod_e searchMethod, const U32 depth,
ZSTD_dictMode_e const dictMode)
{
const BYTE* const istart = (const BYTE*)src;
@@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+ : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
+ : ZSTD_HcFindBestMatch_selectMLS);
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -649,13 +660,16 @@ size_t ZSTD_compressBlock_lazy_generic(
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
/* init */
ip += (dictAndPrefixLength == 0);
- ms->nextToUpdate3 = ms->nextToUpdate;
if (dictMode == ZSTD_noDict) {
- U32 const maxRep = (U32)(ip - prefixLowest);
+ U32 const current = (U32)(ip - base);
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
+ U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
}
@@ -667,6 +681,12 @@ size_t ZSTD_compressBlock_lazy_generic(
}
/* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+ */
+ __asm__(".p2align 5");
+#endif
while (ip < ilimit) {
size_t matchLength=0;
size_t offset=0;
@@ -800,7 +820,7 @@ size_t ZSTD_compressBlock_lazy_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -818,7 +838,7 @@ _storeSequence:
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue;
@@ -833,7 +853,7 @@ _storeSequence:
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -844,7 +864,7 @@ _storeSequence:
rep[1] = offset_2 ? offset_2 : savedOffset;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -852,56 +872,56 @@ size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
@@ -910,7 +930,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+ const searchMethod_e searchMethod, const U32 depth)
{
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
@@ -919,24 +939,31 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base;
const U32 dictLimit = ms->window.dictLimit;
- const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const dictStart = dictBase + lowestIndex;
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
+ const U32 windowLog = ms->cParams.windowLog;
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
U32 offset_1 = rep[0], offset_2 = rep[1];
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
/* init */
- ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+ */
+ __asm__(".p2align 5");
+#endif
while (ip < ilimit) {
size_t matchLength=0;
size_t offset=0;
@@ -944,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
U32 current = (U32)(ip-base);
/* check repCode */
- { const U32 repIndex = (U32)(current+1 - offset_1);
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
+ const U32 repIndex = (U32)(current+1 - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -974,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
current++;
/* check repCode */
if (offset) {
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1004,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
current++;
/* check repCode */
if (offset) {
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1042,22 +1072,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
/* check immediate repcode */
while (ip <= ilimit) {
- const U32 repIndex = (U32)((ip-base) - offset_2);
+ const U32 repCurrent = (U32)(ip-base);
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+ const U32 repIndex = repCurrent - offset_2;
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -1070,7 +1102,7 @@ _storeSequence:
rep[1] = offset_2;
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -1078,7 +1110,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
size_t ZSTD_compressBlock_lazy_extDict(
@@ -1086,7 +1118,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1094,7 +1126,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1102,5 +1134,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}
diff --git a/zstd/lib/compress/zstd_lazy.h b/zstd/lib/compress/zstd_lazy.h
index bb17630..581936f 100644
--- a/zstd/lib/compress/zstd_lazy.h
+++ b/zstd/lib/compress/zstd_lazy.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/zstd/lib/compress/zstd_ldm.c b/zstd/lib/compress/zstd_ldm.c
index 784d20f..8c47948 100644
--- a/zstd/lib/compress/zstd_ldm.c
+++ b/zstd/lib/compress/zstd_ldm.c
@@ -1,15 +1,16 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_ldm.h"
-#include "debug.h"
+#include "../common/debug.h"
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{
size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
- size_t const ldmBucketSize =
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
}
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
return rollingHash;
}
+void ZSTD_ldm_fillHashTable(
+ ldmState_t* state, const BYTE* ip,
+ const BYTE* iend, ldmParams_t const* params)
+{
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
+ ZSTD_ldm_fillLdmHashTable(
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
+ params->hashLog - params->bucketSizeLog,
+ *params);
+ }
+}
+
/** ZSTD_ldm_limitTableUpdate() :
*
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+ /* invalidate dictionaries on overflow correction */
+ ldmState->loadedDictEnd = 0;
}
/* 2. We enforce the maximum offset allowed.
*
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
+ *
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+ * that any offset used is valid at the END of the sequence, since it may
+ * be split into two sequences. This condition holds when using
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+ * against maxDist directly, we'll have to carefully handle that case.
*/
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
if (sequence.offset == 0)
break;
- assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend);
/* Fill tables for block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, ip);
/* Run the block compressor */
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[i] = rep[i-1];
rep[0] = sequence.offset;
/* Store the sequence */
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
ip += sequence.matchLength;
diff --git a/zstd/lib/compress/zstd_ldm.h b/zstd/lib/compress/zstd_ldm.h
index a478461..229ea05 100644
--- a/zstd/lib/compress/zstd_ldm.h
+++ b/zstd/lib/compress/zstd_ldm.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_LDM_H
@@ -15,7 +16,7 @@ extern "C" {
#endif
#include "zstd_compress_internal.h" /* ldmParams_t, U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "../zstd.h" /* ZSTD_CCtx, size_t */
/*-*************************************
* Long distance matching
@@ -23,6 +24,10 @@ extern "C" {
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+void ZSTD_ldm_fillHashTable(
+ ldmState_t* state, const BYTE* ip,
+ const BYTE* iend, ldmParams_t const* params);
+
/**
* ZSTD_ldm_generateSequences():
*
diff --git a/zstd/lib/compress/zstd_opt.c b/zstd/lib/compress/zstd_opt.c
index efb69d3..36fff05 100644
--- a/zstd/lib/compress/zstd_opt.c
+++ b/zstd/lib/compress/zstd_opt.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
}
}
-/* ZSTD_litLengthContribution() :
- * @return ( cost(litlength) - cost(0) )
- * this value can then be added to rawLiteralsCost()
- * to provide a cost which is directly comparable to a match ending at same position */
-static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
-{
- if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
-
- /* dynamic statistics */
- { U32 const llCode = ZSTD_LLcode(litLength);
- int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
- + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
- - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
-#if 1
- return contribution;
-#else
- return MAX(0, contribution); /* sometimes better, sometimes not ... */
-#endif
- }
-}
-
-/* ZSTD_literalsContribution() :
- * creates a fake cost for the literals part of a sequence
- * which can be compared to the ending cost of a match
- * should a new match start at this position */
-static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
- const optState_t* const optPtr,
- int optLevel)
-{
- int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
- + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
- return contribution;
-}
-
/* ZSTD_getMatchPrice() :
* Provides the cost of the match part (offset + matchLength) of a sequence
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
@@ -372,13 +338,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+ U32* nextToUpdate3,
+ const BYTE* const ip)
{
U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3;
const BYTE* const base = ms->window.base;
- U32 idx = ms->nextToUpdate3;
- U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
+ U32 idx = *nextToUpdate3;
+ U32 const target = (U32)(ip - base);
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
assert(hashLog3 > 0);
@@ -387,6 +355,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
idx++;
}
+ *nextToUpdate3 = target;
return hashTable3[hash3];
}
@@ -503,9 +472,11 @@ static U32 ZSTD_insertBt1(
} }
*smallerPtr = *largerPtr = 0;
- if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
- assert(matchEndIdx > current + 8);
- return matchEndIdx - (current + 8);
+ { U32 positions = 0;
+ if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
+ assert(matchEndIdx > current + 8);
+ return MAX(positions, matchEndIdx - (current + 8));
+ }
}
FORCE_INLINE_TEMPLATE
@@ -520,8 +491,13 @@ void ZSTD_updateTree_internal(
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode);
- while(idx < target)
- idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+ while(idx < target) {
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+ assert(idx < (U32)(idx + forward));
+ idx += forward;
+ }
+ assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+ assert((size_t)(iend - base) <= (size_t)(U32)(-1));
ms->nextToUpdate = target;
}
@@ -531,11 +507,12 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
+ U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
- U32 rep[ZSTD_REP_NUM],
+ const U32 rep[ZSTD_REP_NUM],
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
- ZSTD_match_t* matches,
const U32 lengthToBeat,
U32 const mls /* template */)
{
@@ -556,8 +533,8 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
- U32 const btLow = btMask >= current ? 0 : current - btMask;
- U32 const windowLow = ms->window.lowLimit;
+ U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
U32 const matchLow = windowLow ? windowLow : 1;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -592,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 repLen = 0;
assert(current >= dictLimit);
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
- if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
+ /* We must validate the repcode offset because when we're using a dictionary the
+ * valid offset range shrinks when the dictionary goes out of bounds.
+ */
+ if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
}
} else { /* repIndex < dictLimit || repIndex >= current */
@@ -627,7 +607,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
/* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) {
- U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
+ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
if ((matchIndex3 >= matchLow)
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen;
@@ -653,9 +633,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
(ip+mlen == iLimit) ) { /* best possible length */
ms->nextToUpdate = current+1; /* skip insertion */
return 1;
- }
- }
- }
+ } } }
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
}
@@ -663,19 +641,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
while (nbCompares-- && (matchIndex >= matchLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match;
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(current > matchIndex);
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
match = base + matchIndex;
+ if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
} else {
match = dictBase + matchIndex;
+ assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* prepare for match[matchLength] */
+ match = base + matchIndex; /* prepare for match[matchLength] read */
}
if (matchLength > bestLength) {
@@ -760,10 +740,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+ ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
ZSTD_matchState_t* ms,
+ U32* nextToUpdate3,
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
- U32 rep[ZSTD_REP_NUM], U32 const ll0,
- ZSTD_match_t* matches, U32 const lengthToBeat)
+ const U32 rep[ZSTD_REP_NUM],
+ U32 const ll0,
+ U32 const lengthToBeat)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const matchLengthSearch = cParams->minMatch;
@@ -772,12 +755,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
switch(matchLengthSearch)
{
- case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
+ case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
default :
- case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
- case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
+ case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+ case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
case 7 :
- case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
+ case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
}
}
@@ -785,30 +768,6 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
/*-*******************************
* Optimal parser
*********************************/
-typedef struct repcodes_s {
- U32 rep[3];
-} repcodes_t;
-
-static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
-{
- repcodes_t newReps;
- if (offset >= ZSTD_REP_NUM) { /* full offset */
- newReps.rep[2] = rep[1];
- newReps.rep[1] = rep[0];
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
- } else { /* repcode */
- U32 const repCode = offset + ll0;
- if (repCode > 0) { /* note : if repCode==0, no change */
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
- newReps.rep[1] = rep[0];
- newReps.rep[0] = currentOffset;
- } else { /* repCode == 0 */
- memcpy(&newReps, rep, sizeof(newReps));
- }
- }
- return newReps;
-}
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
@@ -825,7 +784,7 @@ listStats(const U32* table, int lastEltID)
int enb;
for (enb=0; enb < nbElts; enb++) {
(void)table;
- //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
+ /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
RAWLOG(2, "%4i,", table[enb]);
}
RAWLOG(2, " \n");
@@ -853,6 +812,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+ U32 nextToUpdate3 = ms->nextToUpdate;
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
@@ -862,7 +822,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
assert(optLevel <= 2);
- ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
ip += (ip==prefixStart);
@@ -873,14 +832,19 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
opt[0].mlen = 0; /* means is_a_literal */
opt[0].litlen = litlen;
- opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
+ /* We don't need to include the actual price of the literals because
+ * it is static for the duration of the forward pass, and is included
+ * in every price. We include the literal length to avoid negative
+ * prices when we subtract the previous literal length.
+ */
+ opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
@@ -909,7 +873,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offset = matches[matchNb].off;
U32 const end = matches[matchNb].len;
- repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice;
@@ -919,8 +882,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[pos].off = offset;
opt[pos].litlen = litlen;
opt[pos].price = sequencePrice;
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} }
last_pos = pos-1;
}
@@ -947,7 +908,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[cur].off = 0;
opt[cur].litlen = litlen;
opt[cur].price = price;
- memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
} else {
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -955,6 +915,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
}
}
+ /* Set the repcodes of the current position. We must do it here
+ * because we rely on the repcodes of the 2nd to last sequence being
+ * correct to set the next chunks repcodes during the backward
+ * traversal.
+ */
+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+ assert(cur >= opt[cur].mlen);
+ if (opt[cur].mlen != 0) {
+ U32 const prev = cur - opt[cur].mlen;
+ repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+ memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+ } else {
+ memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+ }
+
/* last match must start at a minimum distance of 8 from oend */
if (inr > ilimit) continue;
@@ -970,7 +945,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
U32 const previousPrice = opt[cur].price;
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 matchNb;
if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
@@ -995,7 +970,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* set prices using matches found at position == cur */
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offset = matches[matchNb].off;
- repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
U32 const lastML = matches[matchNb].len;
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen;
@@ -1015,8 +989,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[pos].off = offset;
opt[pos].litlen = litlen;
opt[pos].price = price;
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} else {
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
@@ -1032,6 +1004,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(opt[0].mlen == 0);
+ /* Set the next chunk's repcodes based on the repcodes of the beginning
+ * of the last match, and the last sequence. This avoids us having to
+ * update them while traversing the sequences.
+ */
+ if (lastSequence.mlen != 0) {
+ repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+ memcpy(rep, &reps, sizeof(reps));
+ } else {
+ memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+ }
+
{ U32 const storeEnd = cur + 1;
U32 storeStart = storeEnd;
U32 seqPos = cur;
@@ -1068,33 +1051,18 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
continue; /* will finish */
}
- /* repcodes update : like ZSTD_updateRep(), but update in place */
- if (offCode >= ZSTD_REP_NUM) { /* full offset */
- rep[2] = rep[1];
- rep[1] = rep[0];
- rep[0] = offCode - ZSTD_REP_MOVE;
- } else { /* repcode */
- U32 const repCode = offCode + (llen==0);
- if (repCode) { /* note : if repCode==0, no change */
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
- if (repCode >= 2) rep[2] = rep[1];
- rep[1] = rep[0];
- rep[0] = currentOffset;
- } }
-
assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
- ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
anchor += advance;
ip = anchor;
} }
ZSTD_setBasePrices(optStatePtr, optLevel);
}
-
} /* while (ip < ilimit) */
/* Return the last literals size */
- return iend - anchor;
+ return (size_t)(iend - anchor);
}
@@ -1158,7 +1126,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit;
- ms->nextToUpdate3 = ms->window.dictLimit;
/* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt);
diff --git a/zstd/lib/compress/zstd_opt.h b/zstd/lib/compress/zstd_opt.h
index 094f747..9aba8a9 100644
--- a/zstd/lib/compress/zstd_opt.h
+++ b/zstd/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/zstd/lib/compress/zstdmt_compress.c b/zstd/lib/compress/zstdmt_compress.c
index 38fbb90..1e3c8fd 100644
--- a/zstd/lib/compress/zstdmt_compress.c
+++ b/zstd/lib/compress/zstdmt_compress.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -22,9 +22,9 @@
/* ====== Dependencies ====== */
#include <string.h> /* memcpy, memset */
#include <limits.h> /* INT_MAX, UINT_MAX */
-#include "mem.h" /* MEM_STATIC */
-#include "pool.h" /* threadpool */
-#include "threading.h" /* mutex */
+#include "../common/mem.h" /* MEM_STATIC */
+#include "../common/pool.h" /* threadpool */
+#include "../common/threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
#include "zstd_ldm.h"
#include "zstdmt_compress.h"
@@ -461,7 +461,13 @@ typedef struct {
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
} serialState_t;
-static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
+static int
+ZSTDMT_serialState_reset(serialState_t* serialState,
+ ZSTDMT_seqPool* seqPool,
+ ZSTD_CCtx_params params,
+ size_t jobSize,
+ const void* dict, size_t const dictSize,
+ ZSTD_dictContentType_e dictContentType)
{
/* Adjust parameters */
if (params.ldmParams.enableLdm) {
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
/* Size the seq pool tables */
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
/* Reset the window */
- ZSTD_window_clear(&serialState->ldmState.window);
- serialState->ldmWindow = serialState->ldmState.window;
+ ZSTD_window_init(&serialState->ldmState.window);
/* Resize tables and output space if necessary. */
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
ZSTD_free(serialState->ldmState.hashTable, cMem);
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
/* Zero the tables */
memset(serialState->ldmState.hashTable, 0, hashSize);
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+
+ /* Update window state and fill hash table with dict */
+ serialState->ldmState.loadedDictEnd = 0;
+ if (dictSize > 0) {
+ if (dictContentType == ZSTD_dct_rawContent) {
+ BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+ ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
+ serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
+ } else {
+ /* don't even load anything */
+ }
+ }
+
+ /* Initialize serialState's copy of ldmWindow. */
+ serialState->ldmWindow = serialState->ldmState.window;
}
+
serialState->params = params;
serialState->params.jobSize = (U32)jobSize;
return 0;
@@ -668,7 +690,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* init */
if (job->cdict) {
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} else { /* srcStart points at reloaded section */
@@ -680,7 +702,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
NULL, /*cdict*/
- jobParams, pledgedSrcSize);
+ &jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) JOB_ERROR(initError);
} }
@@ -927,12 +949,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
unsigned jobID;
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+ /* Copy the mutex/cond out */
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
+
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
- mtctx->jobs[jobID].cSize = 0;
+
+ /* Clear the job description, but keep the mutex/cond */
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
+ mtctx->jobs[jobID].job_mutex = mutex;
+ mtctx->jobs[jobID].job_cond = cond;
}
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
mtctx->inBuff.buffer = g_nullBuffer;
mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1;
@@ -1028,9 +1056,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
/* Sets parameters relevant to the compression job,
* initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
{
- ZSTD_CCtx_params jobParams = params;
+ ZSTD_CCtx_params jobParams = *params;
/* Clear parameters related to multithreading */
jobParams.forceWindow = 0;
jobParams.nbWorkers = 0;
@@ -1048,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
{
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
@@ -1070,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
compressionLevel);
mtctx->params.compressionLevel = compressionLevel;
- { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
cParams.windowLog = saved_wlog;
mtctx->params.cParams = cParams;
}
@@ -1129,9 +1157,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
assert(flushed <= produced);
+ assert(jobPtr->consumed <= jobPtr->src.size);
toFlush = produced - flushed;
- if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
- /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
+ /* if toFlush==0, nothing is available to flush.
+ * However, jobID is expected to still be active:
+ * if jobID was already completed and fully flushed,
+ * ZSTDMT_flushProduced() should have already moved onto next job.
+ * Therefore, some input has not yet been consumed. */
+ if (toFlush==0) {
assert(jobPtr->consumed < jobPtr->src.size);
}
}
@@ -1146,14 +1179,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
-static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
{
- if (params.ldmParams.enableLdm)
+ unsigned jobLog;
+ if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
- return MAX(21, params.cParams.chainLog + 4);
- return MAX(20, params.cParams.windowLog + 2);
+ jobLog = MAX(21, params->cParams.chainLog + 4);
+ } else {
+ jobLog = MAX(20, params->cParams.windowLog + 2);
+ }
+ return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
}
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1184,27 +1221,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
return ovlog;
}
-static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
{
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8);
- if (params.ldmParams.enableLdm) {
+ if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead.
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
- assert(0 <= ovLog && ovLog <= 30);
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
+ assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
}
static unsigned
-ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
+ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
{
assert(nbWorkers>0);
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1220,16 +1257,17 @@ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers
/* ZSTDMT_compress_advanced_internal() :
* This is a blocking function : it will only give back control to caller after finishing its compression job.
*/
-static size_t ZSTDMT_compress_advanced_internal(
+static size_t
+ZSTDMT_compress_advanced_internal(
ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params)
{
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
@@ -1247,15 +1285,16 @@ static size_t ZSTDMT_compress_advanced_internal(
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
}
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
+ /* LDM doesn't even try to load the dictionary in single-ingestion mode */
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
return ERROR(memory_allocation);
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
{ unsigned u;
for (u=0; u<nbJobs; u++) {
@@ -1388,19 +1427,19 @@ size_t ZSTDMT_initCStream_internal(
/* init */
if (params.nbWorkers != mtctx->params.nbWorkers)
- FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
+ FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
- if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
assert(singleThreadParams.nbWorkers == 0);
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict,
- singleThreadParams, pledgedSrcSize);
+ &singleThreadParams, pledgedSrcSize);
}
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1426,12 +1465,14 @@ size_t ZSTDMT_initCStream_internal(
mtctx->cdict = cdict;
}
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetSectionSize = params.jobSize;
if (mtctx->targetSectionSize == 0) {
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
}
+ assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
+
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
@@ -1483,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
mtctx->allJobsCompleted = 0;
mtctx->consumed = 0;
mtctx->produced = 0;
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
+ dict, dictSize, dictContentType))
return ERROR(memory_allocation);
return 0;
}
@@ -1697,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
assert(mtctx->doneJobID < mtctx->nextJobID);
assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
- memcpy((char*)output->dst + output->pos,
- (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
- toFlush);
+ if (toFlush > 0) {
+ memcpy((char*)output->dst + output->pos,
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
+ toFlush);
+ }
output->pos += toFlush;
mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
@@ -1769,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
BYTE const* const bufferStart = (BYTE const*)buffer.start;
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
BYTE const* const rangeStart = (BYTE const*)range.start;
- BYTE const* const rangeEnd = rangeStart + range.size;
+ BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
if (rangeStart == NULL || bufferStart == NULL)
return 0;
@@ -2043,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
size_t const jobSize = mtctx->inBuff.filled;
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
}
/* check for potential compressed data ready to be flushed */
@@ -2057,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
- FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
+ FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
/* recommended next input size : fill current input buffer */
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -2074,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
(U32)srcSize, (U32)endFrame);
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
}
/* check if there is any data available to flush */
diff --git a/zstd/lib/compress/zstdmt_compress.h b/zstd/lib/compress/zstdmt_compress.h
index 12e6bcb..89914eb 100644
--- a/zstd/lib/compress/zstdmt_compress.h
+++ b/zstd/lib/compress/zstdmt_compress.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -40,7 +40,7 @@
/* === Dependencies === */
#include <stddef.h> /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
-#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
+#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Constants === */
@@ -50,6 +50,7 @@
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif
+#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
diff --git a/zstd/lib/decompress/huf_decompress.c b/zstd/lib/decompress/huf_decompress.c
index 3f8bd29..68293a1 100644
--- a/zstd/lib/decompress/huf_decompress.c
+++ b/zstd/lib/decompress/huf_decompress.c
@@ -1,47 +1,27 @@
/* ******************************************************************
- huff0 huffman decoder,
- part of Finite State Entropy library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* **************************************************************
* Dependencies
****************************************************************/
#include <string.h> /* memcpy, memset */
-#include "compiler.h"
-#include "bitstream.h" /* BIT_* */
-#include "fse.h" /* to compress headers */
+#include "../common/compiler.h"
+#include "../common/bitstream.h" /* BIT_* */
+#include "../common/fse.h" /* to compress headers */
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#include "error_private.h"
+#include "../common/huf.h"
+#include "../common/error_private.h"
/* **************************************************************
* Macros
@@ -61,7 +41,6 @@
* Error Management
****************************************************************/
#define HUF_isError ERR_isError
-#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
/* **************************************************************
@@ -179,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
/* fill DTable */
{ U32 n;
- for (n=0; n<nbSymbols; n++) {
- U32 const w = huffWeight[n];
- U32 const length = (1 << w) >> 1;
- U32 u;
+ size_t const nEnd = nbSymbols;
+ for (n=0; n<nEnd; n++) {
+ size_t const w = huffWeight[n];
+ size_t const length = (1 << w) >> 1;
+ size_t const uStart = rankVal[w];
+ size_t const uEnd = uStart + length;
+ size_t u;
HUF_DEltX1 D;
- D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
- for (u = rankVal[w]; u < rankVal[w] + length; u++)
- dt[u] = D;
- rankVal[w] += length;
- } }
-
+ D.byte = (BYTE)n;
+ D.nbBits = (BYTE)(tableLog + 1 - w);
+ rankVal[w] = (U32)uEnd;
+ if (length < 4) {
+ /* Use length in the loop bound so the compiler knows it is short. */
+ for (u = 0; u < length; ++u)
+ dt[uStart + u] = D;
+ } else {
+ /* Unroll the loop 4 times, we know it is a power of 2. */
+ for (u = uStart; u < uEnd; u += 4) {
+ dt[u + 0] = D;
+ dt[u + 1] = D;
+ dt[u + 2] = D;
+ dt[u + 3] = D;
+ } } } }
return iSize;
}
@@ -280,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
+ BYTE* const olimit = oend - 3;
const void* const dtPtr = DTable + 1;
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
@@ -304,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
- U32 endSignal = BIT_DStream_unfinished;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
+ U32 endSignal = 1;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -315,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -333,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
- BIT_reloadDStream(&bitD1);
- BIT_reloadDStream(&bitD2);
- BIT_reloadDStream(&bitD3);
- BIT_reloadDStream(&bitD4);
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
}
/* check corruption */
@@ -755,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
return dstSize;
}
-
FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(
void* dst, size_t dstSize,
@@ -767,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
const void* const dtPtr = DTable+1;
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
@@ -791,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
- U32 endSignal;
+ U32 endSignal = 1;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
@@ -802,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
/* 16-32 symbols per loop (4-8 symbols per stream) */
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
+ for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -820,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ endSignal = (U32)LIKELY(
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
}
/* check corruption */
diff --git a/zstd/lib/decompress/zstd_ddict.c b/zstd/lib/decompress/zstd_ddict.c
index 0af3d23..c8cb8ec 100644
--- a/zstd/lib/decompress/zstd_ddict.c
+++ b/zstd/lib/decompress/zstd_ddict.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -15,17 +15,17 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
-#include "cpu.h" /* bmi2 */
-#include "mem.h" /* low level memory routines */
+#include "../common/cpu.h" /* bmi2 */
+#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
+#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
+#include "../common/huf.h"
#include "zstd_decompress_internal.h"
#include "zstd_ddict.h"
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-# include "zstd_legacy.h"
+# include "../legacy/zstd_legacy.h"
#endif
@@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
dctx->virtualStart = ddict->dictContent;
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
if (ddict->entropyPresent) {
dctx->litEntropy = 1;
dctx->fseEntropy = 1;
@@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
/* load entropy tables */
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
- dictionary_corrupted);
+ dictionary_corrupted, "");
ddict->entropyPresent = 1;
return 0;
}
@@ -133,7 +137,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
- FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
+ FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
return 0;
}
diff --git a/zstd/lib/decompress/zstd_ddict.h b/zstd/lib/decompress/zstd_ddict.h
index 0479d11..af307ef 100644
--- a/zstd/lib/decompress/zstd_ddict.h
+++ b/zstd/lib/decompress/zstd_ddict.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -16,7 +16,7 @@
* Dependencies
*********************************************************/
#include <stddef.h> /* size_t */
-#include "zstd.h" /* ZSTD_DDict, and several public functions */
+#include "../zstd.h" /* ZSTD_DDict, and several public functions */
/*-*******************************************************
diff --git a/zstd/lib/decompress/zstd_decompress.c b/zstd/lib/decompress/zstd_decompress.c
index e42872a..be5c7cf 100644
--- a/zstd/lib/decompress/zstd_decompress.c
+++ b/zstd/lib/decompress/zstd_decompress.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -56,19 +56,19 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
-#include "cpu.h" /* bmi2 */
-#include "mem.h" /* low level memory routines */
+#include "../common/cpu.h" /* bmi2 */
+#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
+#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#include "zstd_internal.h" /* blockProperties_t */
+#include "../common/huf.h"
+#include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-# include "zstd_legacy.h"
+# include "../legacy/zstd_legacy.h"
#endif
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
static size_t ZSTD_startingInputLength(ZSTD_format_e format)
{
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
- ZSTD_FRAMEHEADERSIZE_PREFIX;
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
return startingInputLength;
@@ -114,7 +111,12 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->legacyContext = NULL;
dctx->previousLegacyVersion = 0;
dctx->noForwardProgress = 0;
+ dctx->oversizedDuration = 0;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+ dctx->outBufferMode = ZSTD_obm_buffered;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentEndForFuzzing = NULL;
+#endif
}
ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -211,7 +213,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
{
size_t const minInputSize = ZSTD_startingInputLength(format);
- RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);
+ RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
{ BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
U32 const dictID= fhd & 3;
@@ -259,7 +261,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
zfhPtr->frameType = ZSTD_skippableFrame;
return 0;
}
- RETURN_ERROR(prefix_unknown);
+ RETURN_ERROR(prefix_unknown, "");
}
/* ensure there is enough `srcSize` to fully read/decode frame header */
@@ -283,7 +285,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
if (!singleSegment) {
BYTE const wlByte = ip[pos++];
U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
- RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
+ RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
windowSize = (1ULL << windowLog);
windowSize += (windowSize >> 3) * (wlByte&7);
}
@@ -355,14 +357,14 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
U32 sizeU32;
- RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);
+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
- frameParameter_unsupported);
+ frameParameter_unsupported, "");
{
size_t const skippableSize = skippableHeaderSize + sizeU32;
- RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
+ RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
return skippableSize;
}
}
@@ -376,7 +378,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
U32 const magicNumber = MEM_readLE32(src);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -442,7 +444,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
* harder.
*/
RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
- dictionary_wrong);
+ dictionary_wrong, "");
#endif
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
return 0;
@@ -562,21 +564,11 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
* Frame decoding
***************************************************************/
-
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
-{
- if (dst != dctx->previousDstEnd) { /* not contiguous */
- dctx->dictEnd = dctx->previousDstEnd;
- dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
- dctx->prefixStart = dst;
- dctx->previousDstEnd = dst;
- }
-}
-
/** ZSTD_insertBlock() :
- insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
{
+ DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
ZSTD_checkContinuity(dctx, blockStart);
dctx->previousDstEnd = (const char*)blockStart + blockSize;
return blockSize;
@@ -589,9 +581,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
DEBUGLOG(5, "ZSTD_copyRawBlock");
if (dst == NULL) {
if (srcSize == 0) return 0;
- RETURN_ERROR(dstBuffer_null);
+ RETURN_ERROR(dstBuffer_null, "");
}
- RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
+ RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
memcpy(dst, src, srcSize);
return srcSize;
}
@@ -602,9 +594,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
{
if (dst == NULL) {
if (regenSize == 0) return 0;
- RETURN_ERROR(dstBuffer_null);
+ RETURN_ERROR(dstBuffer_null, "");
}
- RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
+ RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
memset(dst, b, regenSize);
return regenSize;
}
@@ -620,7 +612,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
{
const BYTE* ip = (const BYTE*)(*srcPtr);
BYTE* const ostart = (BYTE* const)dst;
- BYTE* const oend = ostart + dstCapacity;
+ BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
BYTE* op = ostart;
size_t remainingSrcSize = *srcSizePtr;
@@ -628,15 +620,16 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
/* check */
RETURN_ERROR_IF(
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
- srcSize_wrong);
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+ srcSize_wrong, "");
/* Frame Header */
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
- srcSize_wrong);
- FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
+ srcSize_wrong, "");
+ FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
}
@@ -649,7 +642,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
ip += ZSTD_blockHeaderSize;
remainingSrcSize -= ZSTD_blockHeaderSize;
- RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);
+ RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
switch(blockProperties.blockType)
{
@@ -664,13 +657,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
break;
case bt_reserved :
default:
- RETURN_ERROR(corruption_detected);
+ RETURN_ERROR(corruption_detected, "invalid block type");
}
if (ZSTD_isError(decodedSize)) return decodedSize;
if (dctx->fParams.checksumFlag)
XXH64_update(&dctx->xxhState, op, decodedSize);
- op += decodedSize;
+ if (decodedSize != 0)
+ op += decodedSize;
+ assert(ip != NULL);
ip += cBlockSize;
remainingSrcSize -= cBlockSize;
if (blockProperties.lastBlock) break;
@@ -678,14 +673,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
- corruption_detected);
+ corruption_detected, "");
}
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
U32 checkRead;
- RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
+ RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
checkRead = MEM_readLE32(ip);
- RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
+ RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
ip += 4;
remainingSrcSize -= 4;
}
@@ -713,7 +708,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
dictSize = ZSTD_DDict_dictSize(ddict);
}
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
@@ -742,7 +737,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
- FORWARD_IF_ERROR(skippableSize);
+ FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
@@ -752,11 +747,11 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
if (ddict) {
/* we were called from ZSTD_decompress_usingDDict */
- FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
} else {
/* this will initialize correctly with no dict if dict == NULL, so
* use this in all cases but ddict */
- FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
}
ZSTD_checkContinuity(dctx, dst);
@@ -777,7 +772,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
"error.");
if (ZSTD_isError(res)) return res;
assert(res <= dstCapacity);
- dst = (BYTE*)dst + res;
+ if (res != 0)
+ dst = (BYTE*)dst + res;
dstCapacity -= res;
}
moreThan1Frame = 1;
@@ -825,7 +821,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
size_t regenSize;
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
- RETURN_ERROR_IF(dctx==NULL, memory_allocation);
+ RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
ZSTD_freeDCtx(dctx);
return regenSize;
@@ -843,6 +839,24 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
****************************************/
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+/**
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+ if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+ return dctx->expected;
+ if (dctx->bType != bt_raw)
+ return dctx->expected;
+ return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
switch(dctx->stage)
{
@@ -875,7 +889,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{
DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
/* Sanity check */
- RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed");
+ RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
switch (dctx->stage)
@@ -900,7 +914,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decodeFrameHeader:
assert(src != NULL);
memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
- FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
+ FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
dctx->expected = ZSTD_blockHeaderSize;
dctx->stage = ZSTDds_decodeBlockHeader;
return 0;
@@ -909,6 +923,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ blockProperties_t bp;
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
+ RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
dctx->expected = cBlockSize;
dctx->bType = bp.blockType;
dctx->rleSize = bp.origSize;
@@ -941,28 +956,41 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+ dctx->expected = 0; /* Streaming not supported */
break;
case bt_raw :
+ assert(srcSize <= dctx->expected);
rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+ FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+ assert(rSize == srcSize);
+ dctx->expected -= rSize;
break;
case bt_rle :
rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+ dctx->expected = 0; /* Streaming not supported */
break;
case bt_reserved : /* should never happen */
default:
- RETURN_ERROR(corruption_detected);
+ RETURN_ERROR(corruption_detected, "invalid block type");
}
- if (ZSTD_isError(rSize)) return rSize;
+ FORWARD_IF_ERROR(rSize, "");
+ RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+ dctx->previousDstEnd = (char*)dst + rSize;
+
+ /* Stay on the same stage until we are finished streaming the block. */
+ if (dctx->expected > 0) {
+ return rSize;
+ }
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
RETURN_ERROR_IF(
dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
&& dctx->decodedSize != dctx->fParams.frameContentSize,
- corruption_detected);
+ corruption_detected, "");
if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
dctx->expected = 4;
dctx->stage = ZSTDds_checkChecksum;
@@ -973,7 +1001,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
} else {
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTD_blockHeaderSize;
- dctx->previousDstEnd = (char*)dst + rSize;
}
return rSize;
}
@@ -983,7 +1010,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src);
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
- RETURN_ERROR_IF(check32 != h32, checksum_wrong);
+ RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
return 0;
@@ -1004,7 +1031,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
default:
assert(0); /* impossible */
- RETURN_ERROR(GENERIC); /* some compiler require default to do something */
+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
}
}
@@ -1015,6 +1042,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
dctx->prefixStart = dict;
dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+ dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
return 0;
}
@@ -1028,7 +1059,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
- RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
+ RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
dictPtr += 8; /* skip header = magic + dictID */
@@ -1047,16 +1078,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
dictPtr, dictEnd - dictPtr,
workspace, workspaceSize);
#endif
- RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
+ RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
dictPtr += hSize;
}
{ short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff, offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
- RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
+ RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+ RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
ZSTD_buildFSETable( entropy->OFTable,
offcodeNCount, offcodeMaxValue,
OF_base, OF_bits,
@@ -1067,9 +1098,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
{ short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
- RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
+ RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+ RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
ZSTD_buildFSETable( entropy->MLTable,
matchlengthNCount, matchlengthMaxValue,
ML_base, ML_bits,
@@ -1080,9 +1111,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
{ short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
- RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
- RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
- RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
+ RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+ RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+ RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
ZSTD_buildFSETable( entropy->LLTable,
litlengthNCount, litlengthMaxValue,
LL_base, LL_bits,
@@ -1090,13 +1121,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
dictPtr += litlengthHeaderSize;
}
- RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
+ RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
{ int i;
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
- dictionary_corrupted);
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+ dictionary_corrupted, "");
entropy->rep[i] = rep;
} }
@@ -1114,7 +1145,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
/* load entropy tables */
{ size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
- RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
+ RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
dict = (const char*)dict + eSize;
dictSize -= eSize;
}
@@ -1137,6 +1168,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
+ dctx->bType = bt_reserved;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
@@ -1148,11 +1180,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
- FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
+ FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
if (dict && dictSize)
RETURN_ERROR_IF(
ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
- dictionary_corrupted);
+ dictionary_corrupted, "");
return 0;
}
@@ -1171,7 +1203,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
DEBUGLOG(4, "DDict is %s",
dctx->ddictIsCold ? "~cold~" : "hot!");
}
- FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
+ FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
if (ddict) { /* NULL ddict is equivalent to no dictionary */
ZSTD_copyDDictParameters(dctx, ddict);
}
@@ -1262,11 +1294,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType)
{
- RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
ZSTD_clearDict(dctx);
- if (dict && dictSize >= 8) {
+ if (dict && dictSize != 0) {
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
- RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
+ RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
dctx->ddict = dctx->ddictLocal;
dctx->dictUses = ZSTD_use_indefinitely;
}
@@ -1285,7 +1317,7 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
- FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType));
+ FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
dctx->dictUses = ZSTD_use_once;
return 0;
}
@@ -1297,14 +1329,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
/* ZSTD_initDStream_usingDict() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
- FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+ return ZSTD_startingInputLength(zds->format);
}
/* note : this variant can't fail */
@@ -1319,24 +1351,24 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
* this function cannot fail */
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
- FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
- FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+ FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+ return ZSTD_startingInputLength(dctx->format);
}
/* ZSTD_resetDStream() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
- FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
+ FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+ return ZSTD_startingInputLength(dctx->format);
}
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
- RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
ZSTD_clearDict(dctx);
if (ddict) {
dctx->ddict = ddict;
@@ -1353,9 +1385,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
size_t const min = (size_t)1 << bounds.lowerBound;
size_t const max = (size_t)1 << bounds.upperBound;
- RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
- RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
- RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+ RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+ RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
dctx->maxWindowSize = maxWindowSize;
return 0;
}
@@ -1378,6 +1410,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
return bounds;
+ case ZSTD_d_stableOutBuffer:
+ bounds.lowerBound = (int)ZSTD_obm_buffered;
+ bounds.upperBound = (int)ZSTD_obm_stable;
+ return bounds;
default:;
}
bounds.error = ERROR(parameter_unsupported);
@@ -1397,12 +1433,12 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
}
#define CHECK_DBOUNDS(p,v) { \
- RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
+ RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
}
size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
{
- RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
switch(dParam) {
case ZSTD_d_windowLogMax:
if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
@@ -1413,9 +1449,13 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
CHECK_DBOUNDS(ZSTD_d_format, value);
dctx->format = (ZSTD_format_e)value;
return 0;
+ case ZSTD_d_stableOutBuffer:
+ CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+ dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
+ return 0;
default:;
}
- RETURN_ERROR(parameter_unsupported);
+ RETURN_ERROR(parameter_unsupported, "");
}
size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
@@ -1427,7 +1467,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
- RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+ RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
ZSTD_clearDict(dctx);
dctx->format = ZSTD_f_zstd1;
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
@@ -1448,7 +1488,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
- frameParameter_windowTooLarge);
+ frameParameter_windowTooLarge, "");
return minRBSize;
}
@@ -1466,30 +1506,94 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
ZSTD_frameHeader zfh;
size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
if (ZSTD_isError(err)) return err;
- RETURN_ERROR_IF(err>0, srcSize_wrong);
+ RETURN_ERROR_IF(err>0, srcSize_wrong, "");
RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
- frameParameter_windowTooLarge);
+ frameParameter_windowTooLarge, "");
return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
}
/* ***** Decompression ***** */
-MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
{
- size_t const length = MIN(dstCapacity, srcSize);
- memcpy(dst, src, length);
- return length;
+ return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+ if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+ zds->oversizedDuration++;
+ else
+ zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+ return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+ ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+ /* No requirement when ZSTD_obm_stable is not enabled. */
+ if (zds->outBufferMode != ZSTD_obm_stable)
+ return 0;
+ /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+ * the context is reset.
+ */
+ if (zds->streamStage == zdss_init)
+ return 0;
+ /* The buffer must match our expectation exactly. */
+ if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+ return 0;
+ RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
}
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+ ZSTD_DStream* zds, char** op, char* oend,
+ void const* src, size_t srcSize) {
+ int const isSkipFrame = ZSTD_isSkipFrame(zds);
+ if (zds->outBufferMode == ZSTD_obm_buffered) {
+ size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+ size_t const decodedSize = ZSTD_decompressContinue(zds,
+ zds->outBuff + zds->outStart, dstSize, src, srcSize);
+ FORWARD_IF_ERROR(decodedSize, "");
+ if (!decodedSize && !isSkipFrame) {
+ zds->streamStage = zdss_read;
+ } else {
+ zds->outEnd = zds->outStart + decodedSize;
+ zds->streamStage = zdss_flush;
+ }
+ } else {
+ /* Write directly into the output buffer */
+ size_t const dstSize = isSkipFrame ? 0 : oend - *op;
+ size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+ FORWARD_IF_ERROR(decodedSize, "");
+ *op += decodedSize;
+ /* Flushing is not needed. */
+ zds->streamStage = zdss_read;
+ assert(*op <= oend);
+ assert(zds->outBufferMode == ZSTD_obm_stable);
+ }
+ return 0;
+}
size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
- const char* const istart = (const char*)(input->src) + input->pos;
- const char* const iend = (const char*)(input->src) + input->size;
+ const char* const src = (const char*)input->src;
+ const char* const istart = input->pos != 0 ? src + input->pos : src;
+ const char* const iend = input->size != 0 ? src + input->size : src;
const char* ip = istart;
- char* const ostart = (char*)(output->dst) + output->pos;
- char* const oend = (char*)(output->dst) + output->size;
+ char* const dst = (char*)output->dst;
+ char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+ char* const oend = output->size != 0 ? dst + output->size : dst;
char* op = ostart;
U32 someMoreWork = 1;
@@ -1505,6 +1609,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
"forbidden. out: pos: %u vs size: %u",
(U32)output->pos, (U32)output->size);
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+ FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
while (someMoreWork) {
switch(zds->streamStage)
@@ -1515,6 +1620,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
zds->legacyVersion = 0;
zds->hostageByte = 0;
+ zds->expectedOutBuffer = *output;
/* fall-through */
case zdss_loadHeader :
@@ -1542,7 +1648,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
"legacy support is incompatible with static dctx");
FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
zds->previousLegacyVersion, legacyVersion,
- dict, dictSize));
+ dict, dictSize), "");
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
{ size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
@@ -1561,7 +1667,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput;
}
input->pos = input->size;
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
}
assert(ip != NULL);
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -1569,7 +1675,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
/* check for single-pass mode opportunity */
- if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
+ if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+ && zds->fParams.frameType != ZSTD_skippableFrame
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
if (cSize <= (size_t)(iend-istart)) {
@@ -1585,15 +1692,23 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
break;
} }
+ /* Check output buffer is large enough for ZSTD_odm_stable. */
+ if (zds->outBufferMode == ZSTD_obm_stable
+ && zds->fParams.frameType != ZSTD_skippableFrame
+ && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+ && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+ RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+ }
+
/* Consume header (see ZSTDds_decodeFrameHeader) */
DEBUGLOG(4, "Consume header");
- FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)));
+ FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
- FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
+ FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
zds->expected = ZSTD_blockHeaderSize;
zds->stage = ZSTDds_decodeBlockHeader;
}
@@ -1604,40 +1719,48 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
(U32)(zds->maxWindowSize >> 10) );
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
- frameParameter_windowTooLarge);
+ frameParameter_windowTooLarge, "");
/* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
- size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize);
- if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) {
- size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
- DEBUGLOG(4, "inBuff : from %u to %u",
- (U32)zds->inBuffSize, (U32)neededInBuffSize);
- DEBUGLOG(4, "outBuff : from %u to %u",
- (U32)zds->outBuffSize, (U32)neededOutBuffSize);
- if (zds->staticSize) { /* static DCtx */
- DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
- assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
- RETURN_ERROR_IF(
- bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
- memory_allocation);
- } else {
- ZSTD_free(zds->inBuff, zds->customMem);
- zds->inBuffSize = 0;
- zds->outBuffSize = 0;
- zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
- RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
- }
- zds->inBuffSize = neededInBuffSize;
- zds->outBuff = zds->inBuff + zds->inBuffSize;
- zds->outBuffSize = neededOutBuffSize;
- } }
+ size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
+ ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+ : 0;
+
+ ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+ { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+ int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+ if (tooSmall || tooLarge) {
+ size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+ DEBUGLOG(4, "inBuff : from %u to %u",
+ (U32)zds->inBuffSize, (U32)neededInBuffSize);
+ DEBUGLOG(4, "outBuff : from %u to %u",
+ (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+ if (zds->staticSize) { /* static DCtx */
+ DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+ assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
+ RETURN_ERROR_IF(
+ bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+ memory_allocation, "");
+ } else {
+ ZSTD_free(zds->inBuff, zds->customMem);
+ zds->inBuffSize = 0;
+ zds->outBuffSize = 0;
+ zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
+ RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+ }
+ zds->inBuffSize = neededInBuffSize;
+ zds->outBuff = zds->inBuff + zds->inBuffSize;
+ zds->outBuffSize = neededOutBuffSize;
+ } } }
zds->streamStage = zdss_read;
/* fall-through */
case zdss_read:
DEBUGLOG(5, "stage zdss_read");
- { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+ { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
if (neededInSize==0) { /* end of frame */
zds->streamStage = zdss_init;
@@ -1645,15 +1768,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
break;
}
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
- int const isSkipFrame = ZSTD_isSkipFrame(zds);
- size_t const decodedSize = ZSTD_decompressContinue(zds,
- zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
- ip, neededInSize);
- if (ZSTD_isError(decodedSize)) return decodedSize;
+ FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
ip += neededInSize;
- if (!decodedSize && !isSkipFrame) break; /* this was just a header */
- zds->outEnd = zds->outStart + decodedSize;
- zds->streamStage = zdss_flush;
+ /* Function modifies the stage so we must break */
break;
} }
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
@@ -1665,6 +1782,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
size_t const toLoad = neededInSize - zds->inPos;
int const isSkipFrame = ZSTD_isSkipFrame(zds);
size_t loadedSize;
+ /* At this point we shouldn't be decompressing a block that we can stream. */
+ assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
if (isSkipFrame) {
loadedSize = MIN(toLoad, (size_t)(iend-ip));
} else {
@@ -1678,17 +1797,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
/* decode loaded input */
- { size_t const decodedSize = ZSTD_decompressContinue(zds,
- zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
- zds->inBuff, neededInSize);
- if (ZSTD_isError(decodedSize)) return decodedSize;
- zds->inPos = 0; /* input is consumed */
- if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */
- zds->outEnd = zds->outStart + decodedSize;
- } }
- zds->streamStage = zdss_flush;
- /* fall-through */
-
+ zds->inPos = 0; /* input is consumed */
+ FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+ /* Function modifies the stage so we must break */
+ break;
+ }
case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
@@ -1711,17 +1824,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
default:
assert(0); /* impossible */
- RETURN_ERROR(GENERIC); /* some compiler require default to do something */
+ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */
} }
/* result */
input->pos = (size_t)(ip - (const char*)(input->src));
output->pos = (size_t)(op - (char*)(output->dst));
+
+ /* Update the expected output buffer for ZSTD_obm_stable. */
+ zds->expectedOutBuffer = *output;
+
if ((ip==istart) && (op==ostart)) { /* no forward progress */
zds->noForwardProgress ++;
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
- RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
- RETURN_ERROR_IF(ip==iend, srcSize_wrong);
+ RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+ RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
assert(0);
}
} else {
diff --git a/zstd/lib/decompress/zstd_decompress_block.c b/zstd/lib/decompress/zstd_decompress_block.c
index a2a7eed..ad3b3d8 100644
--- a/zstd/lib/decompress/zstd_decompress_block.c
+++ b/zstd/lib/decompress/zstd_decompress_block.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -15,14 +15,14 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
-#include "compiler.h" /* prefetch */
-#include "cpu.h" /* bmi2 */
-#include "mem.h" /* low level memory routines */
+#include "../common/compiler.h" /* prefetch */
+#include "../common/cpu.h" /* bmi2 */
+#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
+#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#include "zstd_internal.h"
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h"
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr)
{
- RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
{ U32 const cBlockHeader = MEM_readLE24(src);
U32 const cSize = cBlockHeader >> 3;
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
bpPtr->origSize = cSize; /* only useful for RLE */
if (bpPtr->blockType == bt_rle) return 1;
- RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
return cSize;
}
}
@@ -79,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
{
- RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
{ const BYTE* const istart = (const BYTE*) src;
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -87,7 +88,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
switch(litEncType)
{
case set_repeat:
- RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
/* fall-through */
case set_compressed:
@@ -116,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
/* 2 - 2 - 18 - 18 */
lhSize = 5;
litSize = (lhc >> 4) & 0x3FFFF;
- litCSize = (lhc >> 22) + (istart[4] << 10);
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
break;
}
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
- RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
/* prefetch huffman table if cold */
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -158,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
}
}
- RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
dctx->litPtr = dctx->litBuffer;
dctx->litSize = litSize;
@@ -188,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
}
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
- RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
memcpy(dctx->litBuffer, istart+lhSize, litSize);
dctx->litPtr = dctx->litBuffer;
dctx->litSize = litSize;
@@ -220,7 +222,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
break;
}
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
dctx->litPtr = dctx->litBuffer;
dctx->litSize = litSize;
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
symbolNext[s] = 1;
} else {
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
- symbolNext[s] = normalizedCounter[s];
+ assert(normalizedCounter[s]>=0);
+ symbolNext[s] = (U16)normalizedCounter[s];
} } }
memcpy(dt, &DTableH, sizeof(DTableH));
}
@@ -437,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
switch(type)
{
case set_rle :
- RETURN_ERROR_IF(!srcSize, srcSize_wrong);
- RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
{ U32 const symbol = *(const BYTE*)src;
U32 const baseline = baseValue[symbol];
U32 const nbBits = nbAdditionalBits[symbol];
@@ -450,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
*DTablePtr = defaultTable;
return 0;
case set_repeat:
- RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
/* prefetch FSE table if used */
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
const void* const pStart = *DTablePtr;
@@ -462,8 +465,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
{ unsigned tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
- RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
- RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
*DTablePtr = DTableSpace;
return headerSize;
@@ -484,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
/* check */
- RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
/* SeqHead */
nbSeq = *ip++;
if (!nbSeq) {
*nbSeqPtr=0;
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
return 1;
}
if (nbSeq > 0x7F) {
if (nbSeq == 0xFF) {
- RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
} else {
- RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
}
}
*nbSeqPtr = nbSeq;
/* FSE table descriptors */
- RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -518,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
LL_base, LL_bits,
LL_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq);
- RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += llhSize;
}
@@ -528,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
OF_base, OF_bits,
OF_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq);
- RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += ofhSize;
}
@@ -538,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ML_base, ML_bits,
ML_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq);
- RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += mlhSize;
}
}
@@ -570,38 +573,118 @@ typedef struct {
size_t pos;
} seqState_t;
+/*! ZSTD_overlapCopy8() :
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ * Precondition: *ip <= *op
+ * Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+ assert(*ip <= *op);
+ if (offset < 8) {
+ /* close range match, overlap */
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
+ int const sub2 = dec64table[offset];
+ (*op)[0] = (*ip)[0];
+ (*op)[1] = (*ip)[1];
+ (*op)[2] = (*ip)[2];
+ (*op)[3] = (*ip)[3];
+ *ip += dec32table[offset];
+ ZSTD_copy4(*op+4, *ip);
+ *ip -= sub2;
+ } else {
+ ZSTD_copy8(*op, *ip);
+ }
+ *ip += 8;
+ *op += 8;
+ assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
+ * should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ * The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+ ptrdiff_t const diff = op - ip;
+ BYTE* const oend = op + length;
+
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
-/* ZSTD_execSequenceLast7():
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
- * requires more careful checks, to ensure there is no overflow.
- * performance does not matter though.
- * note : this case is supposed to be never generated "naturally" by reference encoder,
- * since in most cases it needs at least 8 bytes to look for a match.
- * but it's allowed by the specification. */
+ if (length < 8) {
+ /* Handle short lengths. */
+ while (op < oend) *op++ = *ip++;
+ return;
+ }
+ if (ovtype == ZSTD_overlap_src_before_dst) {
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+ assert(length >= 8);
+ ZSTD_overlapCopy8(&op, &ip, diff);
+ assert(op - ip >= 8);
+ assert(op <= oend);
+ }
+
+ if (oend <= oend_w) {
+ /* No risk of overwrite. */
+ ZSTD_wildcopy(op, ip, length, ovtype);
+ return;
+ }
+ if (op <= oend_w) {
+ /* Wildcopy until we get close to the end. */
+ assert(oend > oend_w);
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+ ip += oend_w - op;
+ op = oend_w;
+ }
+ /* Handle the leftovers. */
+ while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+size_t ZSTD_execSequenceEnd(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- /* check */
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
+ /* bounds checks : careful of address space overflow in 32-bit mode */
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+ assert(op < op + sequenceLength);
+ assert(oLitEnd < op + sequenceLength);
/* copy literals */
- while (op < oLitEnd) *op++ = *(*litPtr)++;
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+ op = oLitEnd;
+ *litPtr = iLitEnd;
/* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - base)) {
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix */
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
- match = dictEnd - (base-match);
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+ match = dictEnd - (prefixStart-match);
if (match + sequence.matchLength <= dictEnd) {
memmove(oLitEnd, match, sequence.matchLength);
return sequenceLength;
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
memmove(oLitEnd, match, length1);
op = oLitEnd + length1;
sequence.matchLength -= length1;
- match = base;
+ match = prefixStart;
} }
- while (op < oMatchEnd) *op++ = *match++;
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
return sequenceLength;
}
-
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@@ -627,26 +709,47 @@ size_t ZSTD_execSequence(BYTE* op,
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset;
- /* check */
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
-
- /* copy Literals */
- ZSTD_copy8(op, *litPtr);
- if (sequence.litLength > 8)
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+ assert(op != NULL /* Precondition */);
+ assert(oend_w < oend /* No underflow */);
+ /* Handle edge cases in a slow path:
+ * - Read beyond end of literals
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
+ * - 32-bit mode and the match length overflows
+ */
+ if (UNLIKELY(
+ iLitEnd > litLimit ||
+ oMatchEnd > oend_w ||
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+ assert(op <= oLitEnd /* No overflow */);
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+ assert(oMatchEnd <= oend /* No underflow */);
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+ /* Copy Literals:
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+ * We likely don't need the full 32-byte wildcopy.
+ */
+ assert(WILDCOPY_OVERLENGTH >= 16);
+ ZSTD_copy16(op, (*litPtr));
+ if (UNLIKELY(sequence.litLength > 16)) {
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+ }
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
- /* copy Match */
+ /* Copy Match */
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/* offset beyond prefix -> go into extDict */
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
match = dictEnd + (match - prefixStart);
if (match + sequence.matchLength <= dictEnd) {
memmove(oLitEnd, match, sequence.matchLength);
@@ -658,121 +761,33 @@ size_t ZSTD_execSequence(BYTE* op,
op = oLitEnd + length1;
sequence.matchLength -= length1;
match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
} }
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
+ /* Match within prefix of 1 or more bytes */
+ assert(op <= oMatchEnd);
+ assert(oMatchEnd <= oend_w);
+ assert(match >= prefixStart);
+ assert(sequence.matchLength >= 1);
+
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+ * without overlap checking.
+ */
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+ /* We bet on a full wildcopy for matches, since we expect matches to be
+ * longer than literals (in general). In silesia, ~10% of matches are longer
+ * than 16 bytes.
+ */
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+ return sequenceLength;
}
- return sequenceLength;
-}
-
-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
- BYTE* const oLitEnd = op + sequence.litLength;
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
- const BYTE* match = sequence.match;
+ assert(sequence.offset < WILDCOPY_VECLEN);
- /* check */
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
- /* copy Literals */
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
- if (sequence.litLength > 8)
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
- op = oLitEnd;
- *litPtr = iLitEnd; /* update for next sequence */
+ /* Copy 8 bytes and spread the offset to be >= 8. */
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
- /* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
- /* offset beyond prefix */
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
- if (match + sequence.matchLength <= dictEnd) {
- memmove(oLitEnd, match, sequence.matchLength);
- return sequenceLength;
- }
- /* span extDict & currentPrefixSegment */
- { size_t const length1 = dictEnd - match;
- memmove(oLitEnd, match, length1);
- op = oLitEnd + length1;
- sequence.matchLength -= length1;
- match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
- } }
- assert(op <= oend_w);
- assert(sequence.matchLength >= MINMATCH);
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
+ if (sequence.matchLength > 8) {
+ assert(op < oMatchEnd);
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
}
return sequenceLength;
}
@@ -798,6 +813,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
DStatePtr->state = DInfo.nextState + lowBits;
}
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+ U32 const nbBits = DInfo.nbBits;
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+ DStatePtr->state = DInfo.nextState + lowBits;
+}
+
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
* bits before reloading. This value is the maximum number of bytes we read
@@ -809,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
: 0)
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
{
seq_t seq;
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
- U32 const totalBits = llBits+mlBits+ofBits;
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
+ ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+ ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+ ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+ U32 const llBase = llDInfo.baseValue;
+ U32 const mlBase = mlDInfo.baseValue;
+ U32 const ofBase = ofDInfo.baseValue;
+ BYTE const llBits = llDInfo.nbAdditionalBits;
+ BYTE const mlBits = mlDInfo.nbAdditionalBits;
+ BYTE const ofBits = ofDInfo.nbAdditionalBits;
+ BYTE const totalBits = llBits+mlBits+ofBits;
/* sequence */
{ size_t offset;
- if (!ofBits)
- offset = 0;
- else {
+ if (ofBits > 1) {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff);
@@ -841,58 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
- }
-
- if (ofBits <= 1) {
- offset += (llBase==0);
- if (offset) {
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset = temp;
- } else { /* offset == 0 */
- offset = seqState->prevOffset[0];
- }
- } else {
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
- }
+ } else {
+ U32 const ll0 = (llBase == 0);
+ if (LIKELY((ofBits == 0))) {
+ if (LIKELY(!ll0))
+ offset = seqState->prevOffset[0];
+ else {
+ offset = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset;
+ }
+ } else {
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset = temp;
+ } } }
seq.offset = offset;
}
- seq.matchLength = mlBase
- + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
+ seq.matchLength = mlBase;
+ if (mlBits > 0)
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
- seq.litLength = llBase
- + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
+ seq.litLength = llBase;
+ if (llBits > 0)
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
- /* ANS state update */
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+ if (prefetch == ZSTD_p_prefetch) {
+ size_t const pos = seqState->pos + seq.litLength;
+ const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
+ seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+ * No consequence though : no memory access will occur, offset is only used for prefetching */
+ seqState->pos = pos + seq.matchLength;
+ }
+
+ /* ANS state update
+ * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+ * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+ * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+ * better option, so it is the default for other compilers. But, if you
+ * measure that it is worse, please put up a pull request.
+ */
+ {
+#if defined(__GNUC__) && !defined(__clang__)
+ const int kUseUpdateFseState = 1;
+#else
+ const int kUseUpdateFseState = 0;
+#endif
+ if (kUseUpdateFseState) {
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+ } else {
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
+ }
+ }
return seq;
}
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+ size_t const windowSize = dctx->fParams.windowSize;
+ /* No dictionary used. */
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
+ /* Dictionary is our prefix. */
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+ /* Dictionary is not our ext-dict. */
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+ /* Dictionary is not within our window size. */
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+ /* Dictionary is active. */
+ return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+ ZSTD_DCtx const* dctx,
+ BYTE const* op, BYTE const* oend,
+ seq_t const seq,
+ BYTE const* prefixStart, BYTE const* virtualStart)
+{
+ size_t const windowSize = dctx->fParams.windowSize;
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
+ BYTE const* const oLitEnd = op + seq.litLength;
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+ assert(op <= oend);
+ assert((size_t)(oend - op) >= sequenceSize);
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+ /* Offset must be within the dictionary. */
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+ assert(seq.offset <= windowSize + dictSize);
+ } else {
+ /* Offset must be within our window. */
+ assert(seq.offset <= windowSize);
+ }
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
@@ -905,40 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_body");
+ (void)frame;
/* Regen sequences */
if (nbSeq) {
seqState_t seqState;
+ size_t error = 0;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
RETURN_ERROR_IF(
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
- corruption_detected);
+ corruption_detected, "");
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
- nbSeq--;
- { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
- op += oneSeqSize;
- } }
+ assert(dst != NULL);
+
+ ZSTD_STATIC_ASSERT(
+ BIT_DStream_unfinished < BIT_DStream_completed &&
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+ BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__GNUC__) && defined(__x86_64__)
+ /* Align the decompression loop to 32 + 16 bytes.
+ *
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+ * speed swings based on the alignment of the decompression loop. This
+ * performance swing is caused by parts of the decompression loop falling
+ * out of the DSB. The entire decompression loop should fit in the DSB,
+ * when it can't we get much worse performance. You can measure if you've
+ * hit the good case or the bad case with this perf command for some
+ * compressed file test.zst:
+ *
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+ *
+ * If you see most cycles served out of the MITE you've hit the bad case.
+ * If you see most cycles served out of the DSB you've hit the good case.
+ * If it is pretty even then you may be in an okay case.
+ *
+ * I've been able to reproduce this issue on the following CPUs:
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+ * Use Instruments->Counters to get DSB/MITE cycles.
+ * I never got performance swings, but I was able to
+ * go from the good case of mostly DSB to half of the
+ * cycles served from MITE.
+ * - Coffeelake: Intel i9-9900k
+ *
+ * I haven't been able to reproduce the instability or DSB misses on any
+ * of the following CPUS:
+ * - Haswell
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+ * - Skylake
+ *
+ * If you are seeing performance stability this script can help test.
+ * It tests on 4 commits in zstd where I saw performance change.
+ *
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+ */
+ __asm__(".p2align 5");
+ __asm__("nop");
+ __asm__(".p2align 4");
+#endif
+ for ( ; ; ) {
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+ BIT_reloadDStream(&(seqState.DStream));
+ /* gcc and clang both don't like early returns in this loop.
+ * gcc doesn't like early breaks either.
+ * Instead save an error and report it at the end.
+ * When there is an error, don't increment op, so we don't
+ * overwrite.
+ */
+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
+ else op += oneSeqSize;
+ if (UNLIKELY(!--nbSeq)) break;
+ }
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
- RETURN_ERROR_IF(nbSeq, corruption_detected);
+ if (ZSTD_isError(error)) return error;
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
- memcpy(op, litPtr, lastLLSize);
- op += lastLLSize;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ memcpy(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
}
return op-ostart;
@@ -948,99 +1116,21 @@ static size_t
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
-
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
-{
- seq_t seq;
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
- U32 const totalBits = llBits+mlBits+ofBits;
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
-
- /* sequence */
- { size_t offset;
- if (!ofBits)
- offset = 0;
- else {
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
- assert(ofBits <= MaxOff);
- if (MEM_32bits() && longOffsets) {
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
- } else {
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
- }
- }
-
- if (ofBits <= 1) {
- offset += (llBase==0);
- if (offset) {
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset = temp;
- } else {
- offset = seqState->prevOffset[0];
- }
- } else {
- seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset;
- }
- seq.offset = offset;
- }
-
- seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
- BIT_reloadDStream(&seqState->DStream);
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
- BIT_reloadDStream(&seqState->DStream);
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
-
- seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
- if (MEM_32bits())
- BIT_reloadDStream(&seqState->DStream);
-
- { size_t const pos = seqState->pos + seq.litLength;
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
- * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
- seqState->pos = pos + seq.matchLength;
- }
-
- /* ANS state update */
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
-
- return seq;
-}
-
FORCE_INLINE_TEMPLATE size_t
ZSTD_decompressSequencesLong_body(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
@@ -1052,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+ (void)frame;
/* Regen sequences */
if (nbSeq) {
@@ -1067,36 +1158,45 @@ ZSTD_decompressSequencesLong_body(
seqState.prefixStart = prefixStart;
seqState.pos = (size_t)(op-prefixStart);
seqState.dictEnd = dictEnd;
+ assert(dst != NULL);
assert(iend >= ip);
RETURN_ERROR_IF(
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
- corruption_detected);
+ corruption_detected, "");
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
- sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
+ sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
}
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
- seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb & STORED_SEQS_MASK] = sequence;
op += oneSeqSize;
}
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+ assert(!ZSTD_isError(oneSeqSize));
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
@@ -1107,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
- memcpy(op, litPtr, lastLLSize);
- op += lastLLSize;
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+ if (op != NULL) {
+ memcpy(op, litPtr, lastLLSize);
+ op += lastLLSize;
+ }
}
return op-ostart;
@@ -1119,9 +1221,10 @@ static size_t
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@@ -1131,12 +1234,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
@@ -1145,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@@ -1157,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset);
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame);
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static size_t
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
DEBUGLOG(5, "ZSTD_decompressSequences");
#if DYNAMIC_BMI2
if (dctx->bmi2) {
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
@@ -1186,15 +1294,16 @@ static size_t
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
- const ZSTD_longOffset_e isLongOffset)
+ const ZSTD_longOffset_e isLongOffset,
+ const int frame)
{
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
#if DYNAMIC_BMI2
if (dctx->bmi2) {
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
@@ -1228,7 +1337,6 @@ ZSTD_getLongOffsetsSha