Commit 83265b3e by lhchavez

zlib: Add support for building with Chromium's zlib implementation

This change builds libgit2 using Chromium's zlib implementation by
invoking cmake with `-DUSE_BUNDLED_ZLIB=ON -DUSE_CHROMIUM_ZLIB=ON`,
which is ~10% faster than the bundled zlib for the core::zstream suite.

This version of zlib has some optimizations:

a) Decompression (Intel+ARM): inflate_fast, adler32, crc32, etc.
b) Compression (Intel): fill_window, longest_match, hash function, etc.

Due to the introduction of SIMD optimizations, and to get the maximum
performance out of this fork of zlib, this requires an x86_64 processor
with SSE4.2 and CLMUL (anything Westmere or later, ~2010). The Chromium
zlib implementation also supports ARM with NEON, but it has not been
enabled in this patch.

Performance
===========

TL;DR: Running just `./libgit2_clar -score::zstream` 100 times in a loop
took 0:56.30 before and 0:50.67 after (~10% reduction!).

The bundled and system zlib implementations on an Ubuntu Focal system
perform relatively similar (the bundled one is marginally better due to
the compiler being able to inline some functions), so only the bundled
and Chromium zlibs were compared.

For a more balanced comparison (to ensure that nothing regressed
overall), `libgit2_clar` under `perf` was also run, and the zlib-related
functions were compared.

Bundled
-------

```shell
cmake \
  -DUSE_BUNDLED_ZLIB=ON \
  -DUSE_CHROMIUM_ZLIB=OFF \
  -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
  -DCMAKE_C_FLAGS="-fPIC -fno-omit-frame-pointer" \
  -GNinja \
  ..
ninja
perf record --call-graph=dwarf ./libgit2_clar
perf report --children
```

```
Samples: 87K of event 'cycles', Event count (approx.): 75923450603
  Children      Self  Command       Shared Objec  Symbol
+    4.14%     0.01%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output_chunk
+    2.91%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output
+    0.69%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output (inlined)
     0.17%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_init
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_reset
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_eos
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_done
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_free (inlined)

Samples: 87K of event 'cycles', Event count (approx.): 75923450603
  Children      Self  Command       Shared Objec  Symbol
+    3.12%     0.01%  libgit2_clar  libgit2_clar  [.] deflate
+    2.65%     1.48%  libgit2_clar  libgit2_clar  [.] deflate_slow
+    1.60%     0.55%  libgit2_clar  libgit2_clar  [.] inflate
+    0.53%     0.00%  libgit2_clar  libgit2_clar  [.] write_deflate
     0.49%     0.36%  libgit2_clar  libgit2_clar  [.] inflate_fast
     0.46%     0.02%  libgit2_clar  libgit2_clar  [.] deflate_fast
     0.19%     0.19%  libgit2_clar  libgit2_clar  [.] inflate_table
     0.16%     0.01%  libgit2_clar  libgit2_clar  [.] inflateInit_
     0.15%     0.00%  libgit2_clar  libgit2_clar  [.] inflateInit2_ (inlined)
     0.10%     0.00%  libgit2_clar  libgit2_clar  [.] deflateInit_
     0.10%     0.00%  libgit2_clar  libgit2_clar  [.] deflateInit2_
     0.03%     0.00%  libgit2_clar  libgit2_clar  [.] deflateReset (inlined)
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] deflateReset
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] inflateEnd
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] deflateEnd
     0.01%     0.00%  libgit2_clar  libgit2_clar  [.] deflateResetKeep
     0.01%     0.01%  libgit2_clar  libgit2_clar  [.] inflateReset2
     0.01%     0.00%  libgit2_clar  libgit2_clar  [.] deflateReset (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateReset (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] deflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateResetKeep (inlined)
```

Chromium
--------

```shell
cmake \
  -DUSE_BUNDLED_ZLIB=ON \
  -DUSE_CHROMIUM_ZLIB=ON \
  -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
  -DCMAKE_C_FLAGS="-fPIC -fno-omit-frame-pointer" \
  -GNinja \
  ..
ninja
perf record --call-graph=dwarf ./libgit2_clar
perf report --children
```

```
Samples: 97K of event 'cycles', Event count (approx.): 80862210917
  Children      Self  Command       Shared Objec  Symbol
+    3.31%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output_chunk
+    2.27%     0.01%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output
+    0.55%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_get_output (inlined)
     0.18%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_init
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_reset
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_free (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_done
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] git_zstream_free

Samples: 97K of event 'cycles', Event count (approx.): 80862210917
  Children      Self  Command       Shared Objec  Symbol
+    2.55%     0.01%  libgit2_clar  libgit2_clar  [.] deflate
+    2.25%     1.41%  libgit2_clar  libgit2_clar  [.] deflate_slow
+    1.10%     0.52%  libgit2_clar  libgit2_clar  [.] inflate
     0.36%     0.00%  libgit2_clar  libgit2_clar  [.] write_deflate
     0.30%     0.03%  libgit2_clar  libgit2_clar  [.] deflate_fast
     0.28%     0.15%  libgit2_clar  libgit2_clar  [.] inflate_fast_chunk_
     0.19%     0.19%  libgit2_clar  libgit2_clar  [.] inflate_table
     0.17%     0.01%  libgit2_clar  libgit2_clar  [.] inflateInit_
     0.16%     0.00%  libgit2_clar  libgit2_clar  [.] inflateInit2_ (inlined)
     0.15%     0.00%  libgit2_clar  libgit2_clar  [.] deflateInit_
     0.15%     0.00%  libgit2_clar  libgit2_clar  [.] deflateInit2_
     0.11%     0.01%  libgit2_clar  libgit2_clar  [.] adler32_z
     0.09%     0.09%  libgit2_clar  libgit2_clar  [.] adler32_simd_
     0.05%     0.00%  libgit2_clar  libgit2_clar  [.] deflateReset (inlined)
     0.05%     0.00%  libgit2_clar  libgit2_clar  [.] deflate_read_buf
     0.03%     0.00%  libgit2_clar  libgit2_clar  [.] inflateEnd
     0.02%     0.00%  libgit2_clar  libgit2_clar  [.] deflateReset
     0.01%     0.00%  libgit2_clar  libgit2_clar  [.] deflateEnd
     0.01%     0.01%  libgit2_clar  libgit2_clar  [.] inflateReset2
     0.01%     0.00%  libgit2_clar  libgit2_clar  [.] inflateReset (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] adler32
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateResetKeep (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] deflateResetKeep
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] inflateStateCheck (inlined)
     0.00%     0.00%  libgit2_clar  libgit2_clar  [.] deflateStateCheck (inlined)
```
parent fe41e582
......@@ -51,6 +51,7 @@ OPTION(USE_LEAK_CHECKER "Run tests with leak checker" OFF)
OPTION(DEBUG_POOL "Enable debug pool allocator" OFF)
OPTION(ENABLE_WERROR "Enable compilation with -Werror" OFF)
OPTION(USE_BUNDLED_ZLIB "Use the bundled version of zlib" OFF)
OPTION(USE_CHROMIUM_ZLIB "If using the bundled version of zlib, use the Chromium flavor (x86_64 processor with SSE4.2 and CLMUL required)" OFF)
SET(USE_HTTP_PARSER "" CACHE STRING "Specifies the HTTP Parser implementation; either system or builtin.")
OPTION(DEPRECATE_HARD "Do not include deprecated functions in the library" OFF)
SET(REGEX_BACKEND "" CACHE STRING "Regular expression implementation. One of regcomp_l, pcre2, pcre, regcomp, or builtin.")
......
# CMake build script for the bundled Chromium zlib implementation. So far, it
# is only supported for x86_64 processors with CLMUL, SSE3, SSE4.2.
#
# TODO: The Chromium build file (in deps/chromium-zlib/zlib/BUILD.gn) supports
# more platforms (like ARM with NEON), more can be enabled as needed.
CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
include(FetchContent)
include(FindGit)
# Ensure that the git binary is present to download the sources.
find_package(Git)
IF(NOT Git_FOUND)
message(FATAL_ERROR "git is required to download the Chromium zlib sources")
ENDIF()
FetchContent_Populate(chromium_zlib_src
GIT_REPOSITORY https://chromium.googlesource.com/chromium/src/third_party/zlib.git
GIT_TAG 2c183c9f93a328bfb3121284da13cf89a0f7e64a
QUIET
)
# The Chromium build globally disables some warnings.
disable_warnings(implicit-fallthrough)
disable_warnings(unused-function)
disable_warnings(unused-parameter)
disable_warnings(sign-compare)
disable_warnings(declaration-after-statement)
disable_warnings(missing-declarations)
# -O3 is also set by the Chromium configuration and has been deemed safe enough
# for them.
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
# Common definitions.
add_definitions(
-DSTDC
-DNO_GZIP
-DZLIB_IMPLEMENTATION
)
list(APPEND SRC_ZLIB
"${chromium_zlib_src_SOURCE_DIR}/adler32.c"
"${chromium_zlib_src_SOURCE_DIR}/chromeconf.h"
"${chromium_zlib_src_SOURCE_DIR}/compress.c"
"${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/insert_string.h"
"${chromium_zlib_src_SOURCE_DIR}/cpu_features.c"
"${chromium_zlib_src_SOURCE_DIR}/cpu_features.h"
"${chromium_zlib_src_SOURCE_DIR}/crc32.c"
"${chromium_zlib_src_SOURCE_DIR}/crc32.h"
"${chromium_zlib_src_SOURCE_DIR}/deflate.c"
"${chromium_zlib_src_SOURCE_DIR}/deflate.h"
"${chromium_zlib_src_SOURCE_DIR}/gzclose.c"
"${chromium_zlib_src_SOURCE_DIR}/gzguts.h"
"${chromium_zlib_src_SOURCE_DIR}/gzlib.c"
"${chromium_zlib_src_SOURCE_DIR}/gzread.c"
"${chromium_zlib_src_SOURCE_DIR}/gzwrite.c"
"${chromium_zlib_src_SOURCE_DIR}/infback.c"
"${chromium_zlib_src_SOURCE_DIR}/inffast.c"
"${chromium_zlib_src_SOURCE_DIR}/inffast.h"
"${chromium_zlib_src_SOURCE_DIR}/inffixed.h"
"${chromium_zlib_src_SOURCE_DIR}/inflate.h"
"${chromium_zlib_src_SOURCE_DIR}/inftrees.c"
"${chromium_zlib_src_SOURCE_DIR}/inftrees.h"
"${chromium_zlib_src_SOURCE_DIR}/trees.c"
"${chromium_zlib_src_SOURCE_DIR}/trees.h"
"${chromium_zlib_src_SOURCE_DIR}/uncompr.c"
"${chromium_zlib_src_SOURCE_DIR}/zconf.h"
"${chromium_zlib_src_SOURCE_DIR}/zlib.h"
"${chromium_zlib_src_SOURCE_DIR}/zutil.c"
"${chromium_zlib_src_SOURCE_DIR}/zutil.h"
)
# x86_64-specific optimizations
string(APPEND CMAKE_C_FLAGS " -mssse3 -msse4.2 -mpclmul")
add_definitions(
-DCHROMIUM_ZLIB_NO_CHROMECONF
-DX86_NOT_WINDOWS
-DADLER32_SIMD_SSSE3
-DCRC32_SIMD_SSE42_PCLMUL
-DDEFLATE_FILL_WINDOW_SSE2
-DINFLATE_CHUNK_READ_64LE
-DINFLATE_CHUNK_SIMD_SSE2
)
list(APPEND SRC_ZLIB
"${chromium_zlib_src_SOURCE_DIR}/adler32_simd.c"
"${chromium_zlib_src_SOURCE_DIR}/adler32_simd.h"
"${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/chunkcopy.h"
"${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inffast_chunk.c"
"${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inffast_chunk.h"
"${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inflate.c"
"${chromium_zlib_src_SOURCE_DIR}/crc32_simd.c"
"${chromium_zlib_src_SOURCE_DIR}/crc32_simd.h"
"${chromium_zlib_src_SOURCE_DIR}/crc_folding.c"
"${chromium_zlib_src_SOURCE_DIR}/fill_window_sse.c"
)
list(SORT SRC_ZLIB)
include_directories("${chromium_zlib_src_SOURCE_DIR}")
add_library(chromium_zlib OBJECT ${SRC_ZLIB})
......@@ -202,10 +202,17 @@ IF(NOT USE_BUNDLED_ZLIB)
ENDIF()
ENDIF()
IF(USE_BUNDLED_ZLIB OR NOT ZLIB_FOUND)
IF(USE_CHROMIUM_ZLIB)
ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/chromium-zlib" "${libgit2_BINARY_DIR}/deps/chromium-zlib")
LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/chromium-zlib")
LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:chromium_zlib>)
ADD_FEATURE_INFO(zlib ON "using (Chromium) bundled zlib")
ELSE()
ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/zlib" "${libgit2_BINARY_DIR}/deps/zlib")
LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/zlib")
LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:zlib>)
ADD_FEATURE_INFO(zlib ON "using bundled zlib")
ENDIF()
ENDIF()
# Optional external dependency: libssh2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment