Version in base suite: 1.0.0-2 Base version: libclamunrar_1.0.0-2 Target version: libclamunrar_1.0.3-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/non-free/libc/libclamunrar/libclamunrar_1.0.0-2.dsc Target file: /srv/ftp-master.debian.org/policy/pool/non-free/libc/libclamunrar/libclamunrar_1.0.3-1~deb12u1.dsc .gitattributes | 3 .gitignore | 4 CMakeLists.txt | 2 Cargo.lock | 294 Jenkinsfile | 4 NEWS.md | 165 clamsubmit/CMakeLists.txt | 2 cmake/FindRust.cmake | 26 debian/.git-dpm | 14 debian/changelog | 11 debian/control | 2 debian/copyright | 1 debian/patches/Add-a-version-script-for-libclamunrar-and-.patch | 48 debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch | 6 debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch | 12 debian/patches/cargo-Remove-windows-referenfes.patch | 47 debian/patches/series | 2 debian/upstream/signing-key.asc | 106 libclamav/autoit.c | 14 libclamav/bytecode_api.h | 5 libclamav/dmg.c | 3 libclamav/hfsplus.c | 20 libclamav/matcher-ac.c | 1 libclamav/matcher-bm.c | 1 libclamav/others.h | 2 libclamav/readdb.c | 8 libclamav/rtf.c | 6 libclamav/scanners.c | 123 libclamav/vba_extract.c | 14 libclamav/vba_extract.h | 2 libclamav/xlm_extract.c | 3 libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/aho-corasick/COPYING | 3 libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml | 50 libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md | 483 libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT | 21 libclamav_rust/.cargo/vendor/aho-corasick/README.md | 187 libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE | 24 libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml | 2 libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs | 2141 --- libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs | 573 - libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs | 132 libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs | 258 libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs | 238 libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs | 713 - libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs | 101 libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs | 303 libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs | 1214 -- libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs | 625 - libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs | 117 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs | 318 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs | 185 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md | 386 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs | 414 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs | 62 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs | 1204 -- libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs | 568 - libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs | 181 libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs | 1057 - libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs | 192 libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs | 1254 -- libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/bindgen/Cargo.lock | 446 libclamav_rust/.cargo/vendor/bindgen/Cargo.toml | 89 libclamav_rust/.cargo/vendor/bindgen/README.md | 83 libclamav_rust/.cargo/vendor/bindgen/build.rs | 83 libclamav_rust/.cargo/vendor/bindgen/callbacks.rs | 178 libclamav_rust/.cargo/vendor/bindgen/clang.rs | 2236 +++ libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs | 102 libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs | 260 libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs | 201 libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs | 33 libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs | 322 libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs | 245 libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs | 142 libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs | 5366 +++++++++ libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs | 72 libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs | 57 libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs | 46 libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs | 358 libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs | 444 libclamav_rust/.cargo/vendor/bindgen/csmith-fuzzing/README.md | 65 libclamav_rust/.cargo/vendor/bindgen/deps.rs | 20 libclamav_rust/.cargo/vendor/bindgen/diagnostics.rs | 189 libclamav_rust/.cargo/vendor/bindgen/extra_assertions.rs | 34 libclamav_rust/.cargo/vendor/bindgen/features.rs | 323 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/derive.rs | 732 + libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_destructor.rs | 176 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_float.rs | 252 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_type_param_in_array.rs | 252 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_vtable.rs | 240 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/mod.rs | 407 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/sizedness.rs | 361 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/template_params.rs | 607 + libclamav_rust/.cargo/vendor/bindgen/ir/annotations.rs | 256 libclamav_rust/.cargo/vendor/bindgen/ir/comment.rs | 100 libclamav_rust/.cargo/vendor/bindgen/ir/comp.rs | 1875 +++ libclamav_rust/.cargo/vendor/bindgen/ir/context.rs | 2981 +++++ libclamav_rust/.cargo/vendor/bindgen/ir/derive.rs | 135 libclamav_rust/.cargo/vendor/bindgen/ir/dot.rs | 86 libclamav_rust/.cargo/vendor/bindgen/ir/enum_ty.rs | 323 libclamav_rust/.cargo/vendor/bindgen/ir/function.rs | 787 + libclamav_rust/.cargo/vendor/bindgen/ir/int.rs | 127 libclamav_rust/.cargo/vendor/bindgen/ir/item.rs | 2026 +++ libclamav_rust/.cargo/vendor/bindgen/ir/item_kind.rs | 135 libclamav_rust/.cargo/vendor/bindgen/ir/layout.rs | 136 libclamav_rust/.cargo/vendor/bindgen/ir/mod.rs | 25 libclamav_rust/.cargo/vendor/bindgen/ir/module.rs | 95 libclamav_rust/.cargo/vendor/bindgen/ir/objc.rs | 335 libclamav_rust/.cargo/vendor/bindgen/ir/template.rs | 342 libclamav_rust/.cargo/vendor/bindgen/ir/traversal.rs | 479 libclamav_rust/.cargo/vendor/bindgen/ir/ty.rs | 1273 ++ libclamav_rust/.cargo/vendor/bindgen/ir/var.rs | 488 libclamav_rust/.cargo/vendor/bindgen/lib.rs | 1300 ++ libclamav_rust/.cargo/vendor/bindgen/log_stubs.rs | 32 libclamav_rust/.cargo/vendor/bindgen/options/as_args.rs | 52 libclamav_rust/.cargo/vendor/bindgen/options/helpers.rs | 43 libclamav_rust/.cargo/vendor/bindgen/options/mod.rs | 2008 +++ libclamav_rust/.cargo/vendor/bindgen/parse.rs | 41 libclamav_rust/.cargo/vendor/bindgen/regex_set.rs | 204 libclamav_rust/.cargo/vendor/bindgen/src/callbacks.rs | 106 libclamav_rust/.cargo/vendor/bindgen/src/clang.rs | 2093 --- libclamav_rust/.cargo/vendor/bindgen/src/codegen/bitfield_unit.rs | 102 libclamav_rust/.cargo/vendor/bindgen/src/codegen/bitfield_unit_tests.rs | 260 libclamav_rust/.cargo/vendor/bindgen/src/codegen/dyngen.rs | 178 libclamav_rust/.cargo/vendor/bindgen/src/codegen/error.rs | 33 libclamav_rust/.cargo/vendor/bindgen/src/codegen/helpers.rs | 299 libclamav_rust/.cargo/vendor/bindgen/src/codegen/impl_debug.rs | 245 libclamav_rust/.cargo/vendor/bindgen/src/codegen/impl_partialeq.rs | 142 libclamav_rust/.cargo/vendor/bindgen/src/codegen/mod.rs | 4835 -------- libclamav_rust/.cargo/vendor/bindgen/src/codegen/struct_layout.rs | 438 libclamav_rust/.cargo/vendor/bindgen/src/deps.rs | 20 libclamav_rust/.cargo/vendor/bindgen/src/extra_assertions.rs | 34 libclamav_rust/.cargo/vendor/bindgen/src/features.rs | 302 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/derive.rs | 732 - libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_destructor.rs | 176 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_float.rs | 252 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_type_param_in_array.rs | 252 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_vtable.rs | 240 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/mod.rs | 398 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/sizedness.rs | 361 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/template_params.rs | 608 - libclamav_rust/.cargo/vendor/bindgen/src/ir/annotations.rs | 211 libclamav_rust/.cargo/vendor/bindgen/src/ir/comment.rs | 119 libclamav_rust/.cargo/vendor/bindgen/src/ir/comp.rs | 1854 --- libclamav_rust/.cargo/vendor/bindgen/src/ir/context.rs | 2835 ----- libclamav_rust/.cargo/vendor/bindgen/src/ir/derive.rs | 135 libclamav_rust/.cargo/vendor/bindgen/src/ir/dot.rs | 86 libclamav_rust/.cargo/vendor/bindgen/src/ir/enum_ty.rs | 305 libclamav_rust/.cargo/vendor/bindgen/src/ir/function.rs | 652 - libclamav_rust/.cargo/vendor/bindgen/src/ir/int.rs | 127 libclamav_rust/.cargo/vendor/bindgen/src/ir/item.rs | 2008 --- libclamav_rust/.cargo/vendor/bindgen/src/ir/item_kind.rs | 147 libclamav_rust/.cargo/vendor/bindgen/src/ir/layout.rs | 143 libclamav_rust/.cargo/vendor/bindgen/src/ir/mod.rs | 24 libclamav_rust/.cargo/vendor/bindgen/src/ir/module.rs | 95 libclamav_rust/.cargo/vendor/bindgen/src/ir/objc.rs | 329 libclamav_rust/.cargo/vendor/bindgen/src/ir/template.rs | 343 libclamav_rust/.cargo/vendor/bindgen/src/ir/traversal.rs | 508 libclamav_rust/.cargo/vendor/bindgen/src/ir/ty.rs | 1250 -- libclamav_rust/.cargo/vendor/bindgen/src/ir/var.rs | 455 libclamav_rust/.cargo/vendor/bindgen/src/lib.rs | 2729 ---- libclamav_rust/.cargo/vendor/bindgen/src/log_stubs.rs | 32 libclamav_rust/.cargo/vendor/bindgen/src/main.rs | 113 libclamav_rust/.cargo/vendor/bindgen/src/options.rs | 1000 - libclamav_rust/.cargo/vendor/bindgen/src/parse.rs | 102 libclamav_rust/.cargo/vendor/bindgen/src/regex_set.rs | 92 libclamav_rust/.cargo/vendor/bindgen/src/time.rs | 52 libclamav_rust/.cargo/vendor/bindgen/time.rs | 52 libclamav_rust/.cargo/vendor/bumpalo/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/bumpalo/CHANGELOG.md | 17 libclamav_rust/.cargo/vendor/bumpalo/Cargo.toml | 3 libclamav_rust/.cargo/vendor/bumpalo/src/boxed.rs | 15 libclamav_rust/.cargo/vendor/bumpalo/src/collections/string.rs | 18 libclamav_rust/.cargo/vendor/bumpalo/src/collections/vec.rs | 20 libclamav_rust/.cargo/vendor/bumpalo/src/lib.rs | 4 libclamav_rust/.cargo/vendor/bytemuck/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/bytemuck/Cargo.toml | 8 libclamav_rust/.cargo/vendor/bytemuck/README.md | 8 libclamav_rust/.cargo/vendor/bytemuck/changelog.md | 5 libclamav_rust/.cargo/vendor/bytemuck/src/allocation.rs | 35 libclamav_rust/.cargo/vendor/bytemuck/src/anybitpattern.rs | 4 libclamav_rust/.cargo/vendor/bytemuck/src/checked.rs | 48 libclamav_rust/.cargo/vendor/bytemuck/src/lib.rs | 17 libclamav_rust/.cargo/vendor/bytemuck/src/pod.rs | 285 libclamav_rust/.cargo/vendor/bytemuck/src/zeroable.rs | 342 libclamav_rust/.cargo/vendor/bytemuck/src/zeroable_in_option.rs | 8 libclamav_rust/.cargo/vendor/bytemuck/tests/checked_tests.rs | 328 libclamav_rust/.cargo/vendor/bytemuck/tests/derive.rs | 30 libclamav_rust/.cargo/vendor/either/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/either/Cargo.toml | 4 libclamav_rust/.cargo/vendor/either/README.rst | 4 libclamav_rust/.cargo/vendor/env_logger/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/env_logger/CHANGELOG.md | 3 libclamav_rust/.cargo/vendor/env_logger/Cargo.toml | 85 libclamav_rust/.cargo/vendor/env_logger/LICENSE-APACHE | 201 libclamav_rust/.cargo/vendor/env_logger/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/env_logger/README.md | 183 libclamav_rust/.cargo/vendor/env_logger/src/filter/mod.rs | 868 - libclamav_rust/.cargo/vendor/env_logger/src/filter/regex.rs | 29 libclamav_rust/.cargo/vendor/env_logger/src/filter/string.rs | 24 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/extern_impl.rs | 118 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/mod.rs | 11 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/shim_impl.rs | 5 libclamav_rust/.cargo/vendor/env_logger/src/fmt/mod.rs | 652 - libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/atty.rs | 32 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/mod.rs | 252 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/extern_impl.rs | 532 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/mod.rs | 12 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/shim_impl.rs | 72 libclamav_rust/.cargo/vendor/env_logger/src/lib.rs | 1311 -- libclamav_rust/.cargo/vendor/env_logger/tests/init-twice-retains-filter.rs | 40 libclamav_rust/.cargo/vendor/env_logger/tests/log-in-log.rs | 39 libclamav_rust/.cargo/vendor/env_logger/tests/log_tls_dtors.rs | 66 libclamav_rust/.cargo/vendor/env_logger/tests/regexp_filter.rs | 57 libclamav_rust/.cargo/vendor/exr/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/exr/Cargo.lock | 18 libclamav_rust/.cargo/vendor/exr/Cargo.toml | 11 libclamav_rust/.cargo/vendor/exr/README.md | 3 libclamav_rust/.cargo/vendor/exr/benches/pixel_format_conversion.rs | 69 libclamav_rust/.cargo/vendor/exr/benches/profiling.rs | 13 libclamav_rust/.cargo/vendor/exr/benches/read.rs | 134 libclamav_rust/.cargo/vendor/exr/examples/0a_write_rgba.rs | 1 libclamav_rust/.cargo/vendor/exr/examples/README.md | 6 libclamav_rust/.cargo/vendor/exr/src/compression/mod.rs | 236 libclamav_rust/.cargo/vendor/exr/src/compression/pxr24.rs | 5 libclamav_rust/.cargo/vendor/exr/src/compression/zip.rs | 5 libclamav_rust/.cargo/vendor/exr/src/image/mod.rs | 11 libclamav_rust/.cargo/vendor/flate2/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/flate2/Cargo.lock | 38 libclamav_rust/.cargo/vendor/flate2/Cargo.toml | 8 libclamav_rust/.cargo/vendor/futures-core/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/futures-core/Cargo.toml | 7 libclamav_rust/.cargo/vendor/futures-core/src/task/__internal/atomic_waker.rs | 11 libclamav_rust/.cargo/vendor/futures-sink/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/futures-sink/Cargo.toml | 2 libclamav_rust/.cargo/vendor/glob/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/glob/Cargo.toml | 24 libclamav_rust/.cargo/vendor/glob/README.md | 6 libclamav_rust/.cargo/vendor/glob/src/lib.rs | 39 libclamav_rust/.cargo/vendor/glob/triagebot.toml | 1 libclamav_rust/.cargo/vendor/half/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/half/CHANGELOG.md | 24 libclamav_rust/.cargo/vendor/half/Cargo.toml | 5 libclamav_rust/.cargo/vendor/half/src/bfloat.rs | 109 libclamav_rust/.cargo/vendor/half/src/bfloat/convert.rs | 4 libclamav_rust/.cargo/vendor/half/src/binary16.rs | 109 libclamav_rust/.cargo/vendor/half/src/binary16/convert.rs | 333 libclamav_rust/.cargo/vendor/half/src/lib.rs | 18 libclamav_rust/.cargo/vendor/half/src/slice.rs | 76 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/Cargo.toml | 44 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/LICENSE-APACHE | 201 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/README.md | 22 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/rust-toolchain | 1 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/src/lib.rs | 490 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/src/tcplistener.rs | 13 libclamav_rust/.cargo/vendor/hermit-abi-0.1.19/src/tcpstream.rs | 109 libclamav_rust/.cargo/vendor/hermit-abi/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/hermit-abi/Cargo.toml | 44 libclamav_rust/.cargo/vendor/hermit-abi/rust-toolchain | 1 libclamav_rust/.cargo/vendor/hermit-abi/src/errno.rs | 397 libclamav_rust/.cargo/vendor/hermit-abi/src/lib.rs | 76 libclamav_rust/.cargo/vendor/humantime/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/humantime/Cargo.toml | 37 libclamav_rust/.cargo/vendor/humantime/LICENSE-APACHE | 202 libclamav_rust/.cargo/vendor/humantime/LICENSE-MIT | 26 libclamav_rust/.cargo/vendor/humantime/README.md | 68 libclamav_rust/.cargo/vendor/humantime/benches/datetime_format.rs | 56 libclamav_rust/.cargo/vendor/humantime/benches/datetime_parse.rs | 47 libclamav_rust/.cargo/vendor/humantime/bulk.yaml | 8 libclamav_rust/.cargo/vendor/humantime/src/date.rs | 623 - libclamav_rust/.cargo/vendor/humantime/src/duration.rs | 456 libclamav_rust/.cargo/vendor/humantime/src/lib.rs | 34 libclamav_rust/.cargo/vendor/humantime/src/wrapper.rs | 107 libclamav_rust/.cargo/vendor/humantime/vagga.yaml | 92 libclamav_rust/.cargo/vendor/itoa/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/itoa/Cargo.toml | 2 libclamav_rust/.cargo/vendor/itoa/README.md | 2 libclamav_rust/.cargo/vendor/itoa/src/lib.rs | 2 libclamav_rust/.cargo/vendor/js-sys/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/js-sys/Cargo.toml | 10 libclamav_rust/.cargo/vendor/js-sys/src/lib.rs | 232 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Array.rs | 29 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Intl.rs | 20 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/JSON.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/JsString.rs | 2 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Number.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Object.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Set.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/SharedArrayBuffer.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/Temporal.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/TypedArray.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/WeakMap.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/WeakSet.rs | 1 libclamav_rust/.cargo/vendor/js-sys/tests/wasm/WebAssembly.rs | 2 libclamav_rust/.cargo/vendor/libc/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/libc/CONTRIBUTING.md | 2 libclamav_rust/.cargo/vendor/libc/Cargo.toml | 2 libclamav_rust/.cargo/vendor/libc/build.rs | 16 libclamav_rust/.cargo/vendor/libc/src/fuchsia/mod.rs | 12 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/freebsdlike/freebsd/aarch64.rs | 22 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/freebsdlike/freebsd/freebsd13/mod.rs | 9 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/freebsdlike/freebsd/freebsd14/mod.rs | 9 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/freebsdlike/freebsd/mod.rs | 51 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/freebsdlike/freebsd/riscv64.rs | 22 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/mod.rs | 15 libclamav_rust/.cargo/vendor/libc/src/unix/bsd/netbsdlike/netbsd/mod.rs | 24 libclamav_rust/.cargo/vendor/libc/src/unix/haiku/mod.rs | 16 libclamav_rust/.cargo/vendor/libc/src/unix/hermit/mod.rs | 1 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/android/b32/arm.rs | 3 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/android/b32/x86/mod.rs | 3 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/android/b64/aarch64/mod.rs | 3 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/android/b64/x86_64/mod.rs | 3 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/android/mod.rs | 282 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/emscripten/mod.rs | 1 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/arch/generic/mod.rs | 4 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/gnu/b64/aarch64/align.rs | 7 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/gnu/b64/aarch64/mod.rs | 1 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/gnu/b64/loongarch64/align.rs | 33 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/gnu/b64/loongarch64/mod.rs | 19 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/mod.rs | 36 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/musl/mod.rs | 2 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/uclibc/arm/mod.rs | 2 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/linux/uclibc/mips/mips32/mod.rs | 1 libclamav_rust/.cargo/vendor/libc/src/unix/linux_like/mod.rs | 14 libclamav_rust/.cargo/vendor/libc/src/unix/mod.rs | 155 libclamav_rust/.cargo/vendor/libc/src/unix/newlib/mod.rs | 1 libclamav_rust/.cargo/vendor/libc/src/unix/nto/aarch64.rs | 36 libclamav_rust/.cargo/vendor/libc/src/unix/nto/mod.rs | 3286 +++++ libclamav_rust/.cargo/vendor/libc/src/unix/nto/neutrino.rs | 1288 ++ libclamav_rust/.cargo/vendor/libc/src/unix/nto/x86_64.rs | 132 libclamav_rust/.cargo/vendor/libc/src/unix/redox/mod.rs | 20 libclamav_rust/.cargo/vendor/libc/src/unix/solarish/compat.rs | 49 libclamav_rust/.cargo/vendor/libc/src/unix/solarish/mod.rs | 34 libclamav_rust/.cargo/vendor/libc/src/unix/solarish/solaris.rs | 48 libclamav_rust/.cargo/vendor/libc/src/vxworks/mod.rs | 5 libclamav_rust/.cargo/vendor/libc/src/wasi.rs | 9 libclamav_rust/.cargo/vendor/libc/src/windows/mod.rs | 6 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/Cargo.toml | 72 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/LICENSE | 21 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/LICENSE-APACHE.md | 177 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/LICENSE-MIT.md | 21 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/LICENSE-ZLIB.md | 11 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/Readme.md | 35 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/deflate/buffer.rs | 58 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/deflate/core.rs | 2463 ---- libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/deflate/mod.rs | 227 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/deflate/stream.rs | 121 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/inflate/core.rs | 1931 --- libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/inflate/mod.rs | 277 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/inflate/output_buffer.rs | 60 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/inflate/stream.rs | 415 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/lib.rs | 208 libclamav_rust/.cargo/vendor/miniz_oxide-0.5.4/src/shared.rs | 25 libclamav_rust/.cargo/vendor/nom/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/nom/CHANGELOG.md | 71 libclamav_rust/.cargo/vendor/nom/Cargo.lock | 2 libclamav_rust/.cargo/vendor/nom/Cargo.toml | 49 libclamav_rust/.cargo/vendor/nom/README.md | 22 libclamav_rust/.cargo/vendor/nom/src/bits/complete.rs | 47 libclamav_rust/.cargo/vendor/nom/src/bits/mod.rs | 10 libclamav_rust/.cargo/vendor/nom/src/bits/streaming.rs | 41 libclamav_rust/.cargo/vendor/nom/src/branch/mod.rs | 24 libclamav_rust/.cargo/vendor/nom/src/character/complete.rs | 8 libclamav_rust/.cargo/vendor/nom/src/combinator/mod.rs | 53 libclamav_rust/.cargo/vendor/nom/src/internal.rs | 4 libclamav_rust/.cargo/vendor/nom/src/lib.rs | 7 libclamav_rust/.cargo/vendor/nom/src/macros.rs | 23 libclamav_rust/.cargo/vendor/nom/src/multi/mod.rs | 152 libclamav_rust/.cargo/vendor/nom/src/number/complete.rs | 78 libclamav_rust/.cargo/vendor/nom/src/number/streaming.rs | 76 libclamav_rust/.cargo/vendor/nom/src/sequence/mod.rs | 9 libclamav_rust/.cargo/vendor/nom/src/sequence/tests.rs | 18 libclamav_rust/.cargo/vendor/nom/src/traits.rs | 4 libclamav_rust/.cargo/vendor/nom/tests/issues.rs | 26 libclamav_rust/.cargo/vendor/num-complex/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/num-complex/Cargo.toml | 14 libclamav_rust/.cargo/vendor/num-complex/RELEASES.md | 6 libclamav_rust/.cargo/vendor/num-complex/src/lib.rs | 6 libclamav_rust/.cargo/vendor/num_cpus/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/num_cpus/CHANGELOG.md | 6 libclamav_rust/.cargo/vendor/num_cpus/Cargo.lock | 6 libclamav_rust/.cargo/vendor/num_cpus/Cargo.toml | 4 libclamav_rust/.cargo/vendor/once_cell/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/once_cell/CHANGELOG.md | 4 libclamav_rust/.cargo/vendor/once_cell/Cargo.lock | 2 libclamav_rust/.cargo/vendor/once_cell/Cargo.toml | 2 libclamav_rust/.cargo/vendor/once_cell/README.md | 2 libclamav_rust/.cargo/vendor/once_cell/src/lib.rs | 14 libclamav_rust/.cargo/vendor/once_cell/src/race.rs | 92 libclamav_rust/.cargo/vendor/prettyplease/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/prettyplease/Cargo.toml | 50 libclamav_rust/.cargo/vendor/prettyplease/LICENSE-APACHE | 176 libclamav_rust/.cargo/vendor/prettyplease/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/prettyplease/README.md | 312 libclamav_rust/.cargo/vendor/prettyplease/build.rs | 5 libclamav_rust/.cargo/vendor/prettyplease/examples/input.rs | 1 libclamav_rust/.cargo/vendor/prettyplease/examples/output.prettyplease.rs | 593 + libclamav_rust/.cargo/vendor/prettyplease/examples/output.rustc.rs | 508 libclamav_rust/.cargo/vendor/prettyplease/examples/output.rustfmt.rs | 552 libclamav_rust/.cargo/vendor/prettyplease/src/algorithm.rs | 376 libclamav_rust/.cargo/vendor/prettyplease/src/attr.rs | 287 libclamav_rust/.cargo/vendor/prettyplease/src/convenience.rs | 98 libclamav_rust/.cargo/vendor/prettyplease/src/data.rs | 78 libclamav_rust/.cargo/vendor/prettyplease/src/expr.rs | 1160 ++ libclamav_rust/.cargo/vendor/prettyplease/src/file.rs | 17 libclamav_rust/.cargo/vendor/prettyplease/src/generics.rs | 325 libclamav_rust/.cargo/vendor/prettyplease/src/item.rs | 1646 ++ libclamav_rust/.cargo/vendor/prettyplease/src/iter.rs | 46 libclamav_rust/.cargo/vendor/prettyplease/src/lib.rs | 379 libclamav_rust/.cargo/vendor/prettyplease/src/lifetime.rs | 9 libclamav_rust/.cargo/vendor/prettyplease/src/lit.rs | 52 libclamav_rust/.cargo/vendor/prettyplease/src/mac.rs | 219 libclamav_rust/.cargo/vendor/prettyplease/src/pat.rs | 246 libclamav_rust/.cargo/vendor/prettyplease/src/path.rs | 207 libclamav_rust/.cargo/vendor/prettyplease/src/ring.rs | 81 libclamav_rust/.cargo/vendor/prettyplease/src/stmt.rs | 209 libclamav_rust/.cargo/vendor/prettyplease/src/token.rs | 80 libclamav_rust/.cargo/vendor/prettyplease/src/ty.rs | 286 libclamav_rust/.cargo/vendor/proc-macro2/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/proc-macro2/Cargo.toml | 13 libclamav_rust/.cargo/vendor/proc-macro2/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/proc-macro2/LICENSE-MIT | 2 libclamav_rust/.cargo/vendor/proc-macro2/README.md | 4 libclamav_rust/.cargo/vendor/proc-macro2/build.rs | 74 libclamav_rust/.cargo/vendor/proc-macro2/rust-toolchain.toml | 2 libclamav_rust/.cargo/vendor/proc-macro2/src/extra.rs | 84 libclamav_rust/.cargo/vendor/proc-macro2/src/fallback.rs | 282 libclamav_rust/.cargo/vendor/proc-macro2/src/lib.rs | 131 libclamav_rust/.cargo/vendor/proc-macro2/src/location.rs | 29 libclamav_rust/.cargo/vendor/proc-macro2/src/marker.rs | 4 libclamav_rust/.cargo/vendor/proc-macro2/src/parse.rs | 303 libclamav_rust/.cargo/vendor/proc-macro2/src/rcvec.rs | 9 libclamav_rust/.cargo/vendor/proc-macro2/src/wrapper.rs | 151 libclamav_rust/.cargo/vendor/proc-macro2/tests/marker.rs | 3 libclamav_rust/.cargo/vendor/proc-macro2/tests/test.rs | 140 libclamav_rust/.cargo/vendor/proc-macro2/tests/test_fmt.rs | 2 libclamav_rust/.cargo/vendor/proc-macro2/tests/test_size.rs | 42 libclamav_rust/.cargo/vendor/quote/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/quote/Cargo.toml | 9 libclamav_rust/.cargo/vendor/quote/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/quote/LICENSE-MIT | 2 libclamav_rust/.cargo/vendor/quote/README.md | 2 libclamav_rust/.cargo/vendor/quote/src/lib.rs | 12 libclamav_rust/.cargo/vendor/quote/src/runtime.rs | 60 libclamav_rust/.cargo/vendor/quote/src/spanned.rs | 21 libclamav_rust/.cargo/vendor/quote/tests/test.rs | 35 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-separated.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/not-quotable.stderr | 5 libclamav_rust/.cargo/vendor/quote/tests/ui/not-repeatable.stderr | 66 libclamav_rust/.cargo/vendor/quote/tests/ui/wrong-type-span.stderr | 10 libclamav_rust/.cargo/vendor/rayon-core/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/rayon-core/Cargo.toml | 3 libclamav_rust/.cargo/vendor/rayon-core/src/broadcast/mod.rs | 5 libclamav_rust/.cargo/vendor/rayon-core/src/job.rs | 2 libclamav_rust/.cargo/vendor/rayon-core/src/latch.rs | 90 libclamav_rust/.cargo/vendor/rayon-core/src/registry.rs | 10 libclamav_rust/.cargo/vendor/rayon-core/src/scope/mod.rs | 70 libclamav_rust/.cargo/vendor/rayon-core/tests/stack_overflow_crash.rs | 99 libclamav_rust/.cargo/vendor/rayon/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/rayon/Cargo.toml | 5 libclamav_rust/.cargo/vendor/rayon/RELEASES.md | 12 libclamav_rust/.cargo/vendor/rayon/src/iter/par_bridge.rs | 159 libclamav_rust/.cargo/vendor/rayon/tests/par_bridge_recursion.rs | 30 libclamav_rust/.cargo/vendor/regex/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/regex/CHANGELOG.md | 16 libclamav_rust/.cargo/vendor/regex/Cargo.lock | 30 libclamav_rust/.cargo/vendor/regex/Cargo.toml | 2 libclamav_rust/.cargo/vendor/regex/src/re_bytes.rs | 12 libclamav_rust/.cargo/vendor/regex/src/re_unicode.rs | 12 libclamav_rust/.cargo/vendor/regex/tests/replace.rs | 18 libclamav_rust/.cargo/vendor/ryu/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/ryu/Cargo.lock | 30 libclamav_rust/.cargo/vendor/ryu/Cargo.toml | 2 libclamav_rust/.cargo/vendor/ryu/README.md | 2 libclamav_rust/.cargo/vendor/ryu/src/lib.rs | 2 libclamav_rust/.cargo/vendor/ryu/src/pretty/mantissa.rs | 6 libclamav_rust/.cargo/vendor/ryu/src/pretty/mod.rs | 3 libclamav_rust/.cargo/vendor/ryu/src/s2f.rs | 2 libclamav_rust/.cargo/vendor/serde/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/serde/Cargo.toml | 9 libclamav_rust/.cargo/vendor/serde/README.md | 4 libclamav_rust/.cargo/vendor/serde/build.rs | 38 libclamav_rust/.cargo/vendor/serde/crates-io.md | 2 libclamav_rust/.cargo/vendor/serde/src/de/format.rs | 2 libclamav_rust/.cargo/vendor/serde/src/de/impls.rs | 44 libclamav_rust/.cargo/vendor/serde/src/de/mod.rs | 5 libclamav_rust/.cargo/vendor/serde/src/lib.rs | 37 libclamav_rust/.cargo/vendor/serde/src/private/de.rs | 14 libclamav_rust/.cargo/vendor/serde/src/private/ser.rs | 2 libclamav_rust/.cargo/vendor/serde/src/ser/impls.rs | 70 libclamav_rust/.cargo/vendor/serde/src/ser/mod.rs | 10 libclamav_rust/.cargo/vendor/serde_derive/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/serde_derive/Cargo.toml | 4 libclamav_rust/.cargo/vendor/serde_derive/README.md | 4 libclamav_rust/.cargo/vendor/serde_derive/build.rs | 2 libclamav_rust/.cargo/vendor/serde_derive/crates-io.md | 2 libclamav_rust/.cargo/vendor/serde_derive/src/de.rs | 186 libclamav_rust/.cargo/vendor/serde_derive/src/internals/check.rs | 23 libclamav_rust/.cargo/vendor/serde_derive/src/lib.rs | 4 libclamav_rust/.cargo/vendor/serde_derive/src/ser.rs | 62 libclamav_rust/.cargo/vendor/serde_derive/src/this.rs | 32 libclamav_rust/.cargo/vendor/serde_json/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/serde_json/Cargo.toml | 9 libclamav_rust/.cargo/vendor/serde_json/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/serde_json/README.md | 28 libclamav_rust/.cargo/vendor/serde_json/build.rs | 2 libclamav_rust/.cargo/vendor/serde_json/src/lib.rs | 20 libclamav_rust/.cargo/vendor/serde_json/src/raw.rs | 2 libclamav_rust/.cargo/vendor/serde_json/src/value/de.rs | 4 libclamav_rust/.cargo/vendor/serde_json/src/value/mod.rs | 8 libclamav_rust/.cargo/vendor/serde_json/src/value/ser.rs | 34 libclamav_rust/.cargo/vendor/serde_json/tests/test.rs | 22 libclamav_rust/.cargo/vendor/simd-adler32/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/simd-adler32/CHANGELOG.md | 12 libclamav_rust/.cargo/vendor/simd-adler32/Cargo.toml | 77 libclamav_rust/.cargo/vendor/simd-adler32/LICENSE.md | 21 libclamav_rust/.cargo/vendor/simd-adler32/README.md | 131 libclamav_rust/.cargo/vendor/simd-adler32/src/hash.rs | 156 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/avx2.rs | 214 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/avx512.rs | 242 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/mod.rs | 23 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/neon.rs | 241 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/scalar.rs | 66 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/sse2.rs | 233 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/ssse3.rs | 219 libclamav_rust/.cargo/vendor/simd-adler32/src/imp/wasm.rs | 217 libclamav_rust/.cargo/vendor/simd-adler32/src/lib.rs | 310 libclamav_rust/.cargo/vendor/spin/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/spin/CHANGELOG.md | 17 libclamav_rust/.cargo/vendor/spin/Cargo.lock | 17 libclamav_rust/.cargo/vendor/spin/Cargo.toml | 4 libclamav_rust/.cargo/vendor/spin/README.md | 5 libclamav_rust/.cargo/vendor/spin/src/barrier.rs | 16 libclamav_rust/.cargo/vendor/spin/src/lazy.rs | 12 libclamav_rust/.cargo/vendor/spin/src/lib.rs | 41 libclamav_rust/.cargo/vendor/spin/src/mutex.rs | 20 libclamav_rust/.cargo/vendor/spin/src/mutex/fair.rs | 732 + libclamav_rust/.cargo/vendor/spin/src/mutex/spin.rs | 63 libclamav_rust/.cargo/vendor/spin/src/mutex/ticket.rs | 15 libclamav_rust/.cargo/vendor/spin/src/once.rs | 125 libclamav_rust/.cargo/vendor/spin/src/relax.rs | 5 libclamav_rust/.cargo/vendor/spin/src/rwlock.rs | 96 libclamav_rust/.cargo/vendor/syn-1.0.107/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/syn-1.0.107/Cargo.toml | 147 libclamav_rust/.cargo/vendor/syn-1.0.107/LICENSE-APACHE | 201 libclamav_rust/.cargo/vendor/syn-1.0.107/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/syn-1.0.107/README.md | 285 libclamav_rust/.cargo/vendor/syn-1.0.107/benches/file.rs | 55 libclamav_rust/.cargo/vendor/syn-1.0.107/benches/rust.rs | 170 libclamav_rust/.cargo/vendor/syn-1.0.107/build.rs | 51 libclamav_rust/.cargo/vendor/syn-1.0.107/src/attr.rs | 662 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/await.rs | 2 libclamav_rust/.cargo/vendor/syn-1.0.107/src/bigint.rs | 66 libclamav_rust/.cargo/vendor/syn-1.0.107/src/buffer.rs | 398 libclamav_rust/.cargo/vendor/syn-1.0.107/src/custom_keyword.rs | 253 libclamav_rust/.cargo/vendor/syn-1.0.107/src/custom_punctuation.rs | 300 libclamav_rust/.cargo/vendor/syn-1.0.107/src/data.rs | 493 libclamav_rust/.cargo/vendor/syn-1.0.107/src/derive.rs | 274 libclamav_rust/.cargo/vendor/syn-1.0.107/src/discouraged.rs | 194 libclamav_rust/.cargo/vendor/syn-1.0.107/src/drops.rs | 58 libclamav_rust/.cargo/vendor/syn-1.0.107/src/error.rs | 428 libclamav_rust/.cargo/vendor/syn-1.0.107/src/export.rs | 39 libclamav_rust/.cargo/vendor/syn-1.0.107/src/expr.rs | 3558 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/ext.rs | 139 libclamav_rust/.cargo/vendor/syn-1.0.107/src/file.rs | 125 libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/clone.rs | 2241 +++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/debug.rs | 3042 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/eq.rs | 2195 +++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/fold.rs | 3341 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/hash.rs | 2869 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/visit.rs | 3786 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/visit_mut.rs | 3786 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen_helper.rs | 154 libclamav_rust/.cargo/vendor/syn-1.0.107/src/generics.rs | 1362 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/group.rs | 282 libclamav_rust/.cargo/vendor/syn-1.0.107/src/ident.rs | 101 libclamav_rust/.cargo/vendor/syn-1.0.107/src/item.rs | 3313 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/lib.rs | 984 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/lifetime.rs | 154 libclamav_rust/.cargo/vendor/syn-1.0.107/src/lit.rs | 1600 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/lookahead.rs | 169 libclamav_rust/.cargo/vendor/syn-1.0.107/src/mac.rs | 219 libclamav_rust/.cargo/vendor/syn-1.0.107/src/macros.rs | 177 libclamav_rust/.cargo/vendor/syn-1.0.107/src/op.rs | 234 libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse.rs | 1314 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse_macro_input.rs | 179 libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse_quote.rs | 167 libclamav_rust/.cargo/vendor/syn-1.0.107/src/pat.rs | 927 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/path.rs | 854 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/print.rs | 16 libclamav_rust/.cargo/vendor/syn-1.0.107/src/punctuated.rs | 1087 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/reserved.rs | 44 libclamav_rust/.cargo/vendor/syn-1.0.107/src/sealed.rs | 4 libclamav_rust/.cargo/vendor/syn-1.0.107/src/span.rs | 67 libclamav_rust/.cargo/vendor/syn-1.0.107/src/spanned.rs | 114 libclamav_rust/.cargo/vendor/syn-1.0.107/src/stmt.rs | 349 libclamav_rust/.cargo/vendor/syn-1.0.107/src/thread.rs | 41 libclamav_rust/.cargo/vendor/syn-1.0.107/src/token.rs | 1013 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/tt.rs | 107 libclamav_rust/.cargo/vendor/syn-1.0.107/src/ty.rs | 1304 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/verbatim.rs | 33 libclamav_rust/.cargo/vendor/syn-1.0.107/src/whitespace.rs | 65 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/eq.rs | 806 + libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/mod.rs | 28 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/parse.rs | 48 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/debug/gen.rs | 5640 ++++++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/tests/debug/mod.rs | 125 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/macros/mod.rs | 79 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression.rs | 3 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression/issue1108.rs | 5 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression/issue1235.rs | 32 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/repo/mod.rs | 215 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/repo/progress.rs | 37 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_asyncness.rs | 37 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_attribute.rs | 336 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_derive_input.rs | 894 + libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_expr.rs | 306 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_generics.rs | 285 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_grouping.rs | 52 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_ident.rs | 85 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_item.rs | 336 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_iterators.rs | 68 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_lit.rs | 266 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_meta.rs | 378 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_parse_buffer.rs | 92 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_parse_stream.rs | 12 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_pat.rs | 67 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_path.rs | 126 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_precedence.rs | 460 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_receiver.rs | 127 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_round_trip.rs | 241 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_shebang.rs | 59 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_should_parse.rs | 45 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_size.rs | 29 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_stmt.rs | 93 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_token_trees.rs | 30 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_ty.rs | 352 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_visibility.rs | 148 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/zzz_stable.rs | 33 libclamav_rust/.cargo/vendor/syn/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/syn/Cargo.toml | 39 libclamav_rust/.cargo/vendor/syn/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/syn/README.md | 11 libclamav_rust/.cargo/vendor/syn/benches/file.rs | 10 libclamav_rust/.cargo/vendor/syn/benches/rust.rs | 25 libclamav_rust/.cargo/vendor/syn/build.rs | 51 libclamav_rust/.cargo/vendor/syn/src/attr.rs | 656 - libclamav_rust/.cargo/vendor/syn/src/await.rs | 2 libclamav_rust/.cargo/vendor/syn/src/bigint.rs | 6 libclamav_rust/.cargo/vendor/syn/src/buffer.rs | 79 libclamav_rust/.cargo/vendor/syn/src/custom_keyword.rs | 44 libclamav_rust/.cargo/vendor/syn/src/custom_punctuation.rs | 30 libclamav_rust/.cargo/vendor/syn/src/data.rs | 211 libclamav_rust/.cargo/vendor/syn/src/derive.rs | 41 libclamav_rust/.cargo/vendor/syn/src/discouraged.rs | 25 libclamav_rust/.cargo/vendor/syn/src/drops.rs | 58 libclamav_rust/.cargo/vendor/syn/src/error.rs | 152 libclamav_rust/.cargo/vendor/syn/src/export.rs | 13 libclamav_rust/.cargo/vendor/syn/src/expr.rs | 1346 +- libclamav_rust/.cargo/vendor/syn/src/ext.rs | 8 libclamav_rust/.cargo/vendor/syn/src/file.rs | 4 libclamav_rust/.cargo/vendor/syn/src/gen/clone.rs | 405 libclamav_rust/.cargo/vendor/syn/src/gen/debug.rs | 2923 ++--- libclamav_rust/.cargo/vendor/syn/src/gen/eq.rs | 446 libclamav_rust/.cargo/vendor/syn/src/gen/fold.rs | 1204 -- libclamav_rust/.cargo/vendor/syn/src/gen/hash.rs | 558 libclamav_rust/.cargo/vendor/syn/src/gen/visit.rs | 1413 +- libclamav_rust/.cargo/vendor/syn/src/gen/visit_mut.rs | 1478 +- libclamav_rust/.cargo/vendor/syn/src/gen_helper.rs | 124 libclamav_rust/.cargo/vendor/syn/src/generics.rs | 410 libclamav_rust/.cargo/vendor/syn/src/group.rs | 11 libclamav_rust/.cargo/vendor/syn/src/ident.rs | 115 libclamav_rust/.cargo/vendor/syn/src/item.rs | 1324 +- libclamav_rust/.cargo/vendor/syn/src/lib.rs | 423 libclamav_rust/.cargo/vendor/syn/src/lifetime.rs | 2 libclamav_rust/.cargo/vendor/syn/src/lit.rs | 252 libclamav_rust/.cargo/vendor/syn/src/lookahead.rs | 8 libclamav_rust/.cargo/vendor/syn/src/mac.rs | 64 libclamav_rust/.cargo/vendor/syn/src/macros.rs | 9 libclamav_rust/.cargo/vendor/syn/src/meta.rs | 420 libclamav_rust/.cargo/vendor/syn/src/op.rs | 70 libclamav_rust/.cargo/vendor/syn/src/parse.rs | 102 libclamav_rust/.cargo/vendor/syn/src/parse_macro_input.rs | 57 libclamav_rust/.cargo/vendor/syn/src/parse_quote.rs | 27 libclamav_rust/.cargo/vendor/syn/src/pat.rs | 700 - libclamav_rust/.cargo/vendor/syn/src/path.rs | 472 libclamav_rust/.cargo/vendor/syn/src/print.rs | 2 libclamav_rust/.cargo/vendor/syn/src/punctuated.rs | 161 libclamav_rust/.cargo/vendor/syn/src/reserved.rs | 44 libclamav_rust/.cargo/vendor/syn/src/restriction.rs | 171 libclamav_rust/.cargo/vendor/syn/src/sealed.rs | 2 libclamav_rust/.cargo/vendor/syn/src/span.rs | 37 libclamav_rust/.cargo/vendor/syn/src/spanned.rs | 15 libclamav_rust/.cargo/vendor/syn/src/stmt.rs | 246 libclamav_rust/.cargo/vendor/syn/src/thread.rs | 15 libclamav_rust/.cargo/vendor/syn/src/token.rs | 605 - libclamav_rust/.cargo/vendor/syn/src/tt.rs | 4 libclamav_rust/.cargo/vendor/syn/src/ty.rs | 480 libclamav_rust/.cargo/vendor/syn/src/verbatim.rs | 2 libclamav_rust/.cargo/vendor/syn/src/whitespace.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/common/eq.rs | 178 libclamav_rust/.cargo/vendor/syn/tests/common/parse.rs | 5 libclamav_rust/.cargo/vendor/syn/tests/debug/gen.rs | 3841 ++---- libclamav_rust/.cargo/vendor/syn/tests/debug/mod.rs | 18 libclamav_rust/.cargo/vendor/syn/tests/regression.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/regression/issue1108.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/repo/mod.rs | 211 libclamav_rust/.cargo/vendor/syn/tests/test_asyncness.rs | 8 libclamav_rust/.cargo/vendor/syn/tests/test_attribute.rs | 171 libclamav_rust/.cargo/vendor/syn/tests/test_derive_input.rs | 277 libclamav_rust/.cargo/vendor/syn/tests/test_expr.rs | 128 libclamav_rust/.cargo/vendor/syn/tests/test_generics.rs | 69 libclamav_rust/.cargo/vendor/syn/tests/test_grouping.rs | 9 libclamav_rust/.cargo/vendor/syn/tests/test_item.rs | 54 libclamav_rust/.cargo/vendor/syn/tests/test_iterators.rs | 21 libclamav_rust/.cargo/vendor/syn/tests/test_lit.rs | 11 libclamav_rust/.cargo/vendor/syn/tests/test_meta.rs | 271 libclamav_rust/.cargo/vendor/syn/tests/test_parse_buffer.rs | 1 libclamav_rust/.cargo/vendor/syn/tests/test_parse_stream.rs | 10 libclamav_rust/.cargo/vendor/syn/tests/test_pat.rs | 52 libclamav_rust/.cargo/vendor/syn/tests/test_path.rs | 38 libclamav_rust/.cargo/vendor/syn/tests/test_precedence.rs | 241 libclamav_rust/.cargo/vendor/syn/tests/test_receiver.rs | 328 libclamav_rust/.cargo/vendor/syn/tests/test_round_trip.rs | 40 libclamav_rust/.cargo/vendor/syn/tests/test_shebang.rs | 30 libclamav_rust/.cargo/vendor/syn/tests/test_size.rs | 15 libclamav_rust/.cargo/vendor/syn/tests/test_stmt.rs | 187 libclamav_rust/.cargo/vendor/syn/tests/test_token_trees.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/test_ty.rs | 53 libclamav_rust/.cargo/vendor/syn/tests/test_visibility.rs | 12 libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/termcolor/COPYING | 3 libclamav_rust/.cargo/vendor/termcolor/Cargo.toml | 43 libclamav_rust/.cargo/vendor/termcolor/LICENSE-MIT | 21 libclamav_rust/.cargo/vendor/termcolor/README.md | 115 libclamav_rust/.cargo/vendor/termcolor/UNLICENSE | 24 libclamav_rust/.cargo/vendor/termcolor/rustfmt.toml | 2 libclamav_rust/.cargo/vendor/termcolor/src/lib.rs | 2260 ---- libclamav_rust/.cargo/vendor/thiserror-impl/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/thiserror-impl/Cargo.toml | 2 libclamav_rust/.cargo/vendor/thiserror/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/thiserror/Cargo.toml | 6 libclamav_rust/.cargo/vendor/thiserror/README.md | 2 libclamav_rust/.cargo/vendor/thiserror/tests/test_display.rs | 31 libclamav_rust/.cargo/vendor/thiserror/tests/ui/no-display.stderr | 30 libclamav_rust/.cargo/vendor/thiserror/tests/ui/source-enum-not-error.stderr | 2 libclamav_rust/.cargo/vendor/thiserror/tests/ui/source-struct-not-error.stderr | 38 libclamav_rust/.cargo/vendor/tiff/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/tiff/CHANGES.md | 13 libclamav_rust/.cargo/vendor/tiff/Cargo.toml | 2 libclamav_rust/.cargo/vendor/tiff/src/decoder/image.rs | 58 libclamav_rust/.cargo/vendor/tiff/src/decoder/stream.rs | 66 libclamav_rust/.cargo/vendor/tiff/src/error.rs | 55 libclamav_rust/.cargo/vendor/toml/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/toml/Cargo.lock | 61 libclamav_rust/.cargo/vendor/toml/Cargo.toml | 59 libclamav_rust/.cargo/vendor/toml/README.md | 21 libclamav_rust/.cargo/vendor/toml/src/datetime.rs | 10 libclamav_rust/.cargo/vendor/toml/src/de.rs | 94 libclamav_rust/.cargo/vendor/toml/src/lib.rs | 8 libclamav_rust/.cargo/vendor/toml/src/macros.rs | 37 libclamav_rust/.cargo/vendor/toml/src/ser.rs | 64 libclamav_rust/.cargo/vendor/toml/src/spanned.rs | 16 libclamav_rust/.cargo/vendor/toml/src/tokens.rs | 20 libclamav_rust/.cargo/vendor/typenum/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/typenum/CHANGELOG.md | 4 libclamav_rust/.cargo/vendor/typenum/Cargo.toml | 16 libclamav_rust/.cargo/vendor/typenum/build/generic_const_mappings.rs | 91 libclamav_rust/.cargo/vendor/typenum/build/main.rs | 27 libclamav_rust/.cargo/vendor/typenum/build/op.rs | 1 libclamav_rust/.cargo/vendor/typenum/src/int.rs | 48 libclamav_rust/.cargo/vendor/typenum/src/lib.rs | 22 libclamav_rust/.cargo/vendor/typenum/src/type_operators.rs | 2 libclamav_rust/.cargo/vendor/typenum/src/uint.rs | 63 libclamav_rust/.cargo/vendor/unicode-ident/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/unicode-ident/Cargo.toml | 2 libclamav_rust/.cargo/vendor/unicode-ident/README.md | 2 libclamav_rust/.cargo/vendor/unicode-ident/src/tables.rs | 2 libclamav_rust/.cargo/vendor/unicode-ident/tests/static_size.rs | 9 libclamav_rust/.cargo/vendor/unicode-ident/tests/tables/mod.rs | 7 libclamav_rust/.cargo/vendor/unicode-ident/tests/tables/tables.rs | 347 libclamav_rust/.cargo/vendor/unicode-segmentation/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/unicode-segmentation/Cargo.toml | 2 libclamav_rust/.cargo/vendor/unicode-segmentation/README.md | 2 libclamav_rust/.cargo/vendor/unicode-segmentation/benches/unicode_words.rs | 55 libclamav_rust/.cargo/vendor/unicode-segmentation/benches/word_bounds.rs | 55 libclamav_rust/.cargo/vendor/unicode-segmentation/scripts/unicode.py | 62 libclamav_rust/.cargo/vendor/unicode-segmentation/src/tables.rs | 346 libclamav_rust/.cargo/vendor/wasm-bindgen-backend/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/wasm-bindgen-backend/Cargo.toml | 4 libclamav_rust/.cargo/vendor/wasm-bindgen-backend/src/ast.rs | 19 libclamav_rust/.cargo/vendor/wasm-bindgen-backend/src/codegen.rs | 183 libclamav_rust/.cargo/vendor/wasm-bindgen-backend/src/encode.rs | 17 libclamav_rust/.cargo/vendor/wasm-bindgen-macro-support/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/wasm-bindgen-macro-support/Cargo.toml | 8 libclamav_rust/.cargo/vendor/wasm-bindgen-macro-support/src/lib.rs | 12 libclamav_rust/.cargo/vendor/wasm-bindgen-macro-support/src/parser.rs | 114 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/Cargo.toml | 11 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/src/lib.rs | 29 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/src/worker.js | 1 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/async-errors.stderr | 114 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/link-to.rs | 32 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/link-to.stderr | 31 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/missing-catch.stderr | 30 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/start-function.rs | 20 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/start-function.stderr | 12 libclamav_rust/.cargo/vendor/wasm-bindgen-macro/ui-tests/traits-not-implemented.stderr | 32 libclamav_rust/.cargo/vendor/wasm-bindgen-shared/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/wasm-bindgen-shared/Cargo.toml | 2 libclamav_rust/.cargo/vendor/wasm-bindgen-shared/src/lib.rs | 8 libclamav_rust/.cargo/vendor/wasm-bindgen-shared/src/schema_hash_approval.rs | 2 libclamav_rust/.cargo/vendor/wasm-bindgen/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/wasm-bindgen/Cargo.toml | 10 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/SUMMARY.md | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/contributing/design/exporting-rust-struct.md | 2 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/reference/attributes/on-rust-exports/start.md | 2 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/reference/attributes/on-rust-exports/typescript_type.md | 3 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/reference/cli.md | 25 libclamav_rust/.cargo/vendor/wasm-bindgen/guide/src/reference/static-js-objects.md | 64 libclamav_rust/.cargo/vendor/wasm-bindgen/src/closure.rs | 8 libclamav_rust/.cargo/vendor/wasm-bindgen/src/convert/closures.rs | 8 libclamav_rust/.cargo/vendor/wasm-bindgen/src/convert/impls.rs | 12 libclamav_rust/.cargo/vendor/wasm-bindgen/src/convert/slices.rs | 84 libclamav_rust/.cargo/vendor/wasm-bindgen/src/convert/traits.rs | 27 libclamav_rust/.cargo/vendor/wasm-bindgen/src/describe.rs | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/src/lib.rs | 52 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/api.rs | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/classes.js | 4 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/closures.js | 2 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/closures.rs | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/futures.js | 4 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/futures.rs | 10 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/import_class.js | 20 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/import_class.rs | 21 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/imports.js | 5 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/imports.rs | 12 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/intrinsics.rs | 3 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/jscast.rs | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/link_to.js | 4 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/link_to.rs | 30 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/linked_module.js | 1 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/main.rs | 4 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/owned.js | 13 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/owned.rs | 35 libclamav_rust/.cargo/vendor/wasm-bindgen/tests/wasm/simple.rs | 2 libclamav_rust/.cargo/vendor/which/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/which/Cargo.toml | 2 libclamav_rust/.cargo/vendor/which/src/lib.rs | 225 libclamav_rust/.cargo/vendor/zune-inflate/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/zune-inflate/CHANGELOG.md | 29 libclamav_rust/.cargo/vendor/zune-inflate/Cargo.toml | 55 libclamav_rust/.cargo/vendor/zune-inflate/README.md | 120 libclamav_rust/.cargo/vendor/zune-inflate/benches/decode.rs | 161 libclamav_rust/.cargo/vendor/zune-inflate/src/bitstream.rs | 187 libclamav_rust/.cargo/vendor/zune-inflate/src/constants.rs | 156 libclamav_rust/.cargo/vendor/zune-inflate/src/crc.rs | 35 libclamav_rust/.cargo/vendor/zune-inflate/src/crc/crc_tables.rs | 296 libclamav_rust/.cargo/vendor/zune-inflate/src/decoder.rs | 1795 +++ libclamav_rust/.cargo/vendor/zune-inflate/src/errors.rs | 106 libclamav_rust/.cargo/vendor/zune-inflate/src/gzip_constants.rs | 13 libclamav_rust/.cargo/vendor/zune-inflate/src/lib.rs | 91 libclamav_rust/.cargo/vendor/zune-inflate/src/utils.rs | 123 libclamav_rust/Cargo.toml | 2 libclamav_rust/build.rs | 6 libclamav_rust/src/cdiff.rs | 114 libclamav_rust/src/evidence.rs | 6 libclamav_rust/src/fuzzy_hash.rs | 2 libclamav_rust/src/logging.rs | 6 libclamav_rust/src/sys.rs | 332 libclamunrar/CMakeLists.txt | 10 libclamunrar/UnRARDll.vcxproj | 12 libclamunrar/archive.cpp | 12 libclamunrar/archive.hpp | 11 libclamunrar/arcread.cpp | 136 libclamunrar/array.hpp | 34 libclamunrar/blake2s.hpp | 9 libclamunrar/cmddata.cpp | 25 libclamunrar/cmddata.hpp | 6 libclamunrar/cmdfilter.cpp | 4 libclamunrar/cmdmix.cpp | 7 libclamunrar/compress.hpp | 1 libclamunrar/crc.cpp | 161 libclamunrar/crc.hpp | 4 libclamunrar/crypt.cpp | 14 libclamunrar/crypt.hpp | 72 libclamunrar/crypt3.cpp | 5 libclamunrar/crypt5.cpp | 12 libclamunrar/dll.rc | 10 libclamunrar/errhnd.cpp | 5 libclamunrar/extinfo.cpp | 99 libclamunrar/extinfo.hpp | 3 libclamunrar/extract.cpp | 545 libclamunrar/extract.hpp | 34 libclamunrar/filcreat.cpp | 2 libclamunrar/filcreat.hpp | 2 libclamunrar/file.cpp | 67 libclamunrar/file.hpp | 10 libclamunrar/filefn.cpp | 4 libclamunrar/filefn.hpp | 2 libclamunrar/find.cpp | 18 libclamunrar/getbits.cpp | 8 libclamunrar/getbits.hpp | 14 libclamunrar/hardlinks.cpp | 2 libclamunrar/hash.cpp | 2 libclamunrar/hash.hpp | 10 libclamunrar/headers.cpp | 10 libclamunrar/headers.hpp | 29 libclamunrar/headers5.hpp | 7 libclamunrar/isnt.cpp | 2 libclamunrar/list.cpp | 11 libclamunrar/loclang.hpp | 7 libclamunrar/makefile | 11 libclamunrar/model.cpp | 6 libclamunrar/options.cpp | 8 libclamunrar/options.hpp | 12 libclamunrar/os.hpp | 13 libclamunrar/pathfn.cpp | 93 libclamunrar/pathfn.hpp | 1 libclamunrar/qopen.cpp | 2 libclamunrar/rar.hpp | 4 libclamunrar/rardefs.hpp | 10 libclamunrar/rawint.hpp | 6 libclamunrar/rdwrfn.cpp | 4 libclamunrar/recvol.cpp | 4 libclamunrar/recvol.hpp | 22 libclamunrar/recvol3.cpp | 17 libclamunrar/recvol5.cpp | 8 libclamunrar/rijndael.cpp | 98 libclamunrar/rijndael.hpp | 10 libclamunrar/scantree.cpp | 19 libclamunrar/secpassword.cpp | 30 libclamunrar/secpassword.hpp | 9 libclamunrar/strfn.cpp | 26 libclamunrar/strfn.hpp | 1 libclamunrar/system.cpp | 6 libclamunrar/threadmisc.cpp | 2 libclamunrar/timefn.hpp | 11 libclamunrar/ui.hpp | 37 libclamunrar/uicommon.cpp | 2 libclamunrar/uiconsole.cpp | 5 libclamunrar/uisilent.cpp | 3 libclamunrar/ulinks.cpp | 8 libclamunrar/unicode.cpp | 7 libclamunrar/unicode.hpp | 2 libclamunrar/unpack.cpp | 2 libclamunrar/unpack.hpp | 6 libclamunrar/unpack30.cpp | 2 libclamunrar/unpack50.cpp | 4 libclamunrar/unpack50mt.cpp | 14 libclamunrar/uowners.cpp | 48 libclamunrar/version.hpp | 8 libclamunrar/volume.cpp | 10 libclamunrar/volume.hpp | 3 libclamunrar/win32lnk.cpp | 2 libclamunrar/win32stm.cpp | 16 libclamunrar_iface/CMakeLists.txt | 9 libfreshclam/dns.c | 6 libfreshclam/libfreshclam.c | 7 libfreshclam/libfreshclam_internal.c | 44 sigtool/sigtool.c | 6 unit_tests/clamscan/container_sigs_test.py | 4 unit_tests/clamscan/fp_check_test.py | 4 unit_tests/clamscan/heuristics_test.py | 2 unit_tests/freshclam_test.py | 34 unit_tests/sigtool_test.py | 78 973 files changed, 128871 insertions(+), 73847 deletions(-) diff -Nru libclamunrar-1.0.0/.gitattributes libclamunrar-1.0.3/.gitattributes --- libclamunrar-1.0.0/.gitattributes 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/.gitattributes 2023-08-25 21:18:34.000000000 +0000 @@ -9,6 +9,9 @@ # Files that should be left untouched (binary is macro for -text -diff) *.ref binary +# Preserve signature for .cargo/vendor files (from the tarabll) ++/.cargo/vendor binary + # # Exclude files from exporting # diff -Nru libclamunrar-1.0.0/.gitignore libclamunrar-1.0.3/.gitignore --- libclamunrar-1.0.0/.gitignore 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/.gitignore 2023-08-25 21:18:34.000000000 +0000 @@ -228,9 +228,5 @@ debug/ target/ -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - # These are backup files generated by rustfmt **/*.rs.bk diff -Nru libclamunrar-1.0.0/CMakeLists.txt libclamunrar-1.0.3/CMakeLists.txt --- libclamunrar-1.0.0/CMakeLists.txt 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/CMakeLists.txt 2023-08-25 21:18:34.000000000 +0000 @@ -22,7 +22,7 @@ set(VERSION_SUFFIX "") project( ClamAV - VERSION "1.0.0" + VERSION "1.0.3" DESCRIPTION "ClamAV open source email, web, and end-point anti-virus toolkit." ) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) diff -Nru libclamunrar-1.0.0/Cargo.lock libclamunrar-1.0.3/Cargo.lock --- libclamunrar-1.0.0/Cargo.lock 2022-11-23 18:54:58.000000000 +0000 +++ libclamunrar-1.0.3/Cargo.lock 2023-08-25 21:18:34.000000000 +0000 @@ -9,15 +9,6 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] name = "ansi_term" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -32,7 +23,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -45,24 +36,24 @@ [[package]] name = "bindgen" -version = "0.59.2" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", "clang-sys", - "clap", - "env_logger", "lazy_static", "lazycell", "log", "peeking_take_while", + "prettyplease", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", + "syn 2.0.15", "which", ] @@ -89,15 +80,15 @@ [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "bytemuck" -version = "1.12.3" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f" +checksum = "c041d3eab048880cb0b86b256447da3f18859a163c3b8d8893f4e6368abe6393" [[package]] name = "byteorder" @@ -119,7 +110,7 @@ "quote", "serde", "serde_json", - "syn", + "syn 1.0.107", "tempfile", "toml", ] @@ -279,36 +270,24 @@ [[package]] name = "either" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" - -[[package]] -name = "env_logger" -version = "0.9.3" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "exr" -version = "1.5.2" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb5f255b5980bb0c8cf676b675d1a99be40f316881444f44e0462eaf5df5ded" +checksum = "e8af5ef47e2ed89d23d0ecbc1b681b30390069de70260937877514377fc24feb" dependencies = [ "bit_field", "flume", "half", "lebe", - "miniz_oxide 0.6.2", + "miniz_oxide", "smallvec", "threadpool", + "zune-inflate", ] [[package]] @@ -322,12 +301,12 @@ [[package]] name = "flate2" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" dependencies = [ "crc32fast", - "miniz_oxide 0.5.4", + "miniz_oxide", ] [[package]] @@ -345,15 +324,15 @@ [[package]] name = "futures-core" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" [[package]] name = "futures-sink" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" [[package]] name = "generic-array" @@ -390,15 +369,15 @@ [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" -version = "2.1.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" +checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" dependencies = [ "crunchy", ] @@ -428,16 +407,19 @@ ] [[package]] -name = "hex" -version = "0.4.3" +name = "hermit-abi" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] [[package]] -name = "humantime" -version = "2.1.0" +name = "hex" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "image" @@ -479,9 +461,9 @@ [[package]] name = "itoa" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" +checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "jpeg-decoder" @@ -494,9 +476,9 @@ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" dependencies = [ "wasm-bindgen", ] @@ -521,9 +503,9 @@ [[package]] name = "libc" -version = "0.2.137" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libloading" @@ -577,15 +559,6 @@ [[package]] name = "miniz_oxide" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" -dependencies = [ - "adler", -] - -[[package]] -name = "miniz_oxide" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" @@ -604,9 +577,9 @@ [[package]] name = "nom" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", @@ -614,9 +587,9 @@ [[package]] name = "num-complex" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" dependencies = [ "num-traits", ] @@ -653,19 +626,19 @@ [[package]] name = "num_cpus" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", "libc", ] [[package]] name = "once_cell" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" [[package]] name = "peeking_take_while" @@ -690,7 +663,7 @@ dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -702,7 +675,17 @@ "bitflags", "crc32fast", "flate2", - "miniz_oxide 0.6.2", + "miniz_oxide", +] + +[[package]] +name = "prettyplease" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +dependencies = [ + "proc-macro2", + "syn 2.0.15", ] [[package]] @@ -716,38 +699,37 @@ [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e060280438193c554f654141c9ea9417886713b7acd75974c85b18a69a88e0b" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.10.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" +checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -766,12 +748,10 @@ [[package]] name = "regex" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ - "aho-corasick", - "memchr", "regex-syntax", ] @@ -822,9 +802,9 @@ [[package]] name = "ryu" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "scoped_threadpool" @@ -840,29 +820,29 @@ [[package]] name = "serde" -version = "1.0.147" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] name = "serde_json" -version = "1.0.89" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db" +checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" dependencies = [ "itoa", "ryu", @@ -887,6 +867,12 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] +name = "simd-adler32" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a5df39617d7c8558154693a1bb8157a4aab8179209540cc0b10e5dc24e0b18" + +[[package]] name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -894,9 +880,9 @@ [[package]] name = "spin" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "7dccf47db1b41fa1573ed27ccf5e08e3ca771cb994f776668c5ebda893b248fc" dependencies = [ "lock_api", ] @@ -915,9 +901,20 @@ [[package]] name = "syn" -version = "1.0.103" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -939,15 +936,6 @@ ] [[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - -[[package]] name = "textwrap" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -958,22 +946,22 @@ [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -987,9 +975,9 @@ [[package]] name = "tiff" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17def29300a156c19ae30814710d9c63cd50288a49c6fd3a10ccfbe4cf886fd" +checksum = "7449334f9ff2baf290d55d73983a7d6fa15e01198faef72af07e2a8db851e471" dependencies = [ "flate2", "jpeg-decoder", @@ -998,9 +986,9 @@ [[package]] name = "toml" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" dependencies = [ "serde", ] @@ -1017,21 +1005,21 @@ [[package]] name = "typenum" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "unicode-ident" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-segmentation" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "unicode-width" @@ -1059,9 +1047,9 @@ [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1069,24 +1057,24 @@ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1094,22 +1082,22 @@ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" [[package]] name = "weezl" @@ -1119,9 +1107,9 @@ [[package]] name = "which" -version = "4.3.0" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" dependencies = [ "either", "libc", @@ -1145,16 +1133,16 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "zune-inflate" +version = "0.2.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c473377c11c4a3ac6a2758f944cd336678e9c977aa0abf54f6450cf77e902d6d" +dependencies = [ + "simd-adler32", +] diff -Nru libclamunrar-1.0.0/Jenkinsfile libclamunrar-1.0.3/Jenkinsfile --- libclamunrar-1.0.0/Jenkinsfile 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/Jenkinsfile 2023-08-25 21:18:34.000000000 +0000 @@ -10,7 +10,7 @@ parameters( [ string(name: 'VERSION', - defaultValue: '1.0.0', + defaultValue: '1.0.3', description: 'ClamAV version string'), string(name: 'FRAMEWORK_BRANCH', defaultValue: '1.0', @@ -37,7 +37,7 @@ defaultValue: 'fuzz-regression-1.0', description: 'test-pipelines branch for fuzz regression tests'), string(name: 'FUZZ_CORPUS_BRANCH', - defaultValue: 'master', + defaultValue: '1.0', description: 'private-fuzz-corpus branch'), string(name: 'APPCHECK_PIPELINE', defaultValue: 'appcheck-1.0', diff -Nru libclamunrar-1.0.0/NEWS.md libclamunrar-1.0.3/NEWS.md --- libclamunrar-1.0.0/NEWS.md 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/NEWS.md 2023-08-25 21:18:34.000000000 +0000 @@ -3,6 +3,97 @@ Note: This file refers to the official packages. Things described here may differ slightly from third-party binary packages. +## 1.0.3 + +ClamAV 1.0.3 is a critical patch release with the following fixes: + +- Upgrade the bundled UnRAR library (libclamunrar) to version 6.2.10. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/1010 + +## 1.0.2 + +ClamAV 1.0.2 is a critical patch release with the following fixes: + +- [CVE-2023-20197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20197) + Fixed a possible denial of service vulnerability in the HFS+ file parser. + This issue affects versions 1.1.0, 1.0.1 through 1.0.0, 0.105.2 through 0.105.0, + 0.104.4 through 0.104.0, and 0.103.8 through 0.103.0. + Thank you to Steve Smith for reporting this issue. + +- [CVE-2023-20212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20212) + Fixed a possible denial of service vulnerability in the AutoIt file parser. + This issue affects versions 1.0.1 and 1.0.0. + This issue does not affect version 1.1.0. + +- Fixed a build issue when using the Rust nightly toolchain, which was + affecting the oss-fuzz build environment used for regression tests. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/996 + +- Fixed a build issue on Windows when using Rust version 1.70 or newer. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/993 + +- CMake build system improvement to support compiling with OpenSSL 3.x on + macOS with the Xcode toolchain. + + The official ClamAV installers and packages are now built with OpenSSL 3.1.1 + or newer. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/973 + +- Fixed an issue where ClamAV does not abort the signature load process after + partially loading an invalid signature. + The bug would later cause a crash when scanning certain files. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/952 + +- Fixed an issue so that ClamAV correctly removes temporary files generated + by the VBA and XLM extraction modules so that the files are not leaked in + patched versions of ClamAV where temporary files are written directly to the + temp-directory instead of writing to a unique subdirectory. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/900 + +- Set Git attributes to prevent Git from altering line endings for bundled Rust + libraries. Third-party Rust libraries are bundled in the ClamAV release + tarball. We do not commit them to our own Git repository, but community + package maintainers may now store the tarball contents in Git. + The Rust build system verifies the library manifest, and this change + ensures that the hashes are correct. + Improvement courtesy of Nicolas R. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/856 + +- Fixed two bugs that would cause Freshclam to fail update when applying a + CDIFF database patch if that patch adds a file to the database archive + or removes a file from the database archive. + This bug also caused Sigtool to fail to create such a patch. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/901 + +Special thanks to the following people for code contributions and bug reports: +- Nicolas R. +- Steve Smith + +## 1.0.1 + +ClamAV 1.0.1 is a critical patch release with the following fixes: + +- [CVE-2023-20032](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20032): + Fixed a possible remote code execution vulnerability in the HFS+ file parser. + Issue affects versions 1.0.0 and earlier, 0.105.1 and earlier, and 0.103.7 and + earlier. + Thank you to Simon Scannell for reporting this issue. + +- [CVE-2023-20052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20052): + Fixed a possible remote information leak vulnerability in the DMG file parser. + Issue affects versions 1.0.0 and earlier, 0.105.1 and earlier, and 0.103.7 and + earlier. + Thank you to Simon Scannell for reporting this issue. + +- Fix allmatch detection issue with the preclass bytecode hook. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/825 + +- Update vendored libmspack library to version 0.11alpha. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/828 + +Special thanks to the following people for code contributions and bug reports: +- Simon Scannell + ## 1.0.0 ClamAV 1.0.0 includes the following improvements and changes. @@ -416,24 +507,25 @@ The CVE's fixes below are also addressed in versions 0.104.3 and 0.103.6. -- [CVE-2022-20803](CVE-2022-20803): Fixed a possible double-free vulnerability - in the OLE2 file parser. +- [CVE-2022-20803](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20803): + Fixed a possible double-free vulnerability in the OLE2 file parser. Issue affects versions 0.104.0 through 0.104.2. Issue identified by OSS-Fuzz. -- [CVE-2022-20770](CVE-2022-20770): Fixed a possible infinite loop vulnerability - in the CHM file parser. +- [CVE-2022-20770](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20770): + Fixed a possible infinite loop vulnerability in the CHM file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20796](CVE-2022-20796): Fixed a possible NULL-pointer dereference - crash in the scan verdict cache check. +- [CVE-2022-20796](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20796): + Fixed a possible NULL-pointer dereference crash in the scan verdict cache + check. Issue affects versions 0.103.4, 0.103.5, 0.104.1, and 0.104.2. Thank you to Alexander Patrakov and Antoine Gatineau for reporting this issue. -- [CVE-2022-20771](CVE-2022-20771): Fixed a possible infinite loop vulnerability - in the TIFF file parser. +- [CVE-2022-20771](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20771): + Fixed a possible infinite loop vulnerability in the TIFF file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. The issue only occurs if the "--alert-broken-media" ClamScan option is @@ -441,14 +533,15 @@ libclamav it is the "CL_SCAN_HEURISTIC_BROKEN_MEDIA" scan option. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20785](CVE-2022-20785): Fixed a possible memory leak in the - HTML file parser / Javascript normalizer. +- [CVE-2022-20785](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20785): + Fixed a possible memory leak in the HTML file parser / Javascript normalizer. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20792](CVE-2022-20792): Fixed a possible multi-byte heap buffer - overflow write vulnerability in the signature database load module. +- [CVE-2022-20792](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20792): + Fixed a possible multi-byte heap buffer overflow write vulnerability in the + signature database load module. The fix was to update the vendored regex library to the latest version. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. @@ -534,24 +627,25 @@ ClamAV 0.104.3 is a critical patch release with the following fixes: -- [CVE-2022-20803](CVE-2022-20803): Fixed a possible double-free vulnerability - in the OLE2 file parser. +- [CVE-2022-20803](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20803): + Fixed a possible double-free vulnerability in the OLE2 file parser. Issue affects versions 0.104.0 through 0.104.2. Issue identified by OSS-Fuzz. -- [CVE-2022-20770](CVE-2022-20770): Fixed a possible infinite loop vulnerability - in the CHM file parser. +- [CVE-2022-20770](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20770): + Fixed a possible infinite loop vulnerability in the CHM file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20796](CVE-2022-20796): Fixed a possible NULL-pointer dereference - crash in the scan verdict cache check. +- [CVE-2022-20796](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20796): + Fixed a possible NULL-pointer dereference crash in the scan verdict cache + check. Issue affects versions 0.103.4, 0.103.5, 0.104.1, and 0.104.2. Thank you to Alexander Patrakov and Antoine Gatineau for reporting this issue. -- [CVE-2022-20771](CVE-2022-20771): Fixed a possible infinite loop vulnerability - in the TIFF file parser. +- [CVE-2022-20771](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20771): + Fixed a possible infinite loop vulnerability in the TIFF file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. The issue only occurs if the "--alert-broken-media" ClamScan option is @@ -559,14 +653,15 @@ libclamav it is the "CL_SCAN_HEURISTIC_BROKEN_MEDIA" scan option. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20785](CVE-2022-20785): Fixed a possible memory leak in the - HTML file parser / Javascript normalizer. +- [CVE-2022-20785](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20785): + Fixed a possible memory leak in the HTML file parser / Javascript normalizer. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20792](CVE-2022-20792): Fixed a possible multi-byte heap buffer - overflow write vulnerability in the signature database load module. +- [CVE-2022-20792](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20792): + Fixed a possible multi-byte heap buffer overflow write vulnerability in the + signature database load module. The fix was to update the vendored regex library to the latest version. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. @@ -906,19 +1001,20 @@ ClamAV 0.103.6 is a critical patch release with the following fixes: -- [CVE-2022-20770](CVE-2022-20770): Fixed a possible infinite loop vulnerability - in the CHM file parser. +- [CVE-2022-20770](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20770): + Fixed a possible infinite loop vulnerability in the CHM file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20796](CVE-2022-20796): Fixed a possible NULL-pointer dereference - crash in the scan verdict cache check. +- [CVE-2022-20796](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20796): + Fixed a possible NULL-pointer dereference crash in the scan verdict cache + check. Issue affects versions 0.103.4, 0.103.5, 0.104.1, and 0.104.2. Thank you to Alexander Patrakov and Antoine Gatineau for reporting this issue. -- [CVE-2022-20771](CVE-2022-20771): Fixed a possible infinite loop vulnerability - in the TIFF file parser. +- [CVE-2022-20771](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20771): + Fixed a possible infinite loop vulnerability in the TIFF file parser. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. The issue only occurs if the "--alert-broken-media" ClamScan option is @@ -926,14 +1022,15 @@ libclamav it is the "CL_SCAN_HEURISTIC_BROKEN_MEDIA" scan option. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20785](CVE-2022-20785): Fixed a possible memory leak in the - HTML file parser / Javascript normalizer. +- [CVE-2022-20785](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20785): + Fixed a possible memory leak in the HTML file parser / Javascript normalizer. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. Thank you to Michał Dardas for reporting this issue. -- [CVE-2022-20792](CVE-2022-20792): Fixed a possible multi-byte heap buffer - overflow write vulnerability in the signature database load module. +- [CVE-2022-20792](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-20792): + Fixed a possible multi-byte heap buffer overflow write vulnerability in the + signature database load module. The fix was to update the vendored regex library to the latest version. Issue affects versions 0.104.0 through 0.104.2 and LTS version 0.103.5 and prior versions. diff -Nru libclamunrar-1.0.0/clamsubmit/CMakeLists.txt libclamunrar-1.0.3/clamsubmit/CMakeLists.txt --- libclamunrar-1.0.0/clamsubmit/CMakeLists.txt 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/clamsubmit/CMakeLists.txt 2023-08-25 21:18:34.000000000 +0000 @@ -34,6 +34,8 @@ PRIVATE ClamAV::libclamav ClamAV::common + OpenSSL::SSL + OpenSSL::Crypto JSONC::jsonc CURL::libcurl ) if(APPLE) diff -Nru libclamunrar-1.0.0/cmake/FindRust.cmake libclamunrar-1.0.3/cmake/FindRust.cmake --- libclamunrar-1.0.0/cmake/FindRust.cmake 2022-11-23 18:54:47.000000000 +0000 +++ libclamunrar-1.0.3/cmake/FindRust.cmake 2023-08-25 21:18:34.000000000 +0000 @@ -294,6 +294,21 @@ WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" DEPENDS ${LIB_SOURCES} COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(arm64)$") + add_custom_command( + OUTPUT "${OUTPUT}" + COMMAND ${CMAKE_COMMAND} -E env "CARGO_CMD=build" "CARGO_TARGET_DIR=${ARGS_BINARY_DIRECTORY}" "MAINTAINER_MODE=${MAINTAINER_MODE}" "RUSTFLAGS=${RUSTFLAGS}" ${cargo_EXECUTABLE} ${MY_CARGO_ARGS} --target=aarch64-apple-darwin + WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" + DEPENDS ${LIB_SOURCES} + COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(x86_64)$") + add_custom_command( + OUTPUT "${OUTPUT}" + COMMAND ${CMAKE_COMMAND} -E env "CARGO_CMD=build" "CARGO_TARGET_DIR=${ARGS_BINARY_DIRECTORY}" "MAINTAINER_MODE=${MAINTAINER_MODE}" "RUSTFLAGS=${RUSTFLAGS}" ${cargo_EXECUTABLE} ${MY_CARGO_ARGS} --target=x86_64-apple-darwin + COMMAND ${CMAKE_COMMAND} -E make_directory "${ARGS_BINARY_DIRECTORY}/${RUST_COMPILER_TARGET}/${CARGO_BUILD_TYPE}" + WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" + DEPENDS ${LIB_SOURCES} + COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") else() add_custom_command( OUTPUT "${OUTPUT}" @@ -382,10 +397,17 @@ ${rustc_VERSION} < ${RUSTC_MINIMUM_REQUIRED}") endif() +if(WIN32) + file(TOUCH ${CMAKE_BINARY_DIR}/empty-file) + set(EMPTY_FILE "${CMAKE_BINARY_DIR}/empty-file") +else() + set(EMPTY_FILE "/dev/null") +endif() + # Determine the native libs required to link w/ rust static libs -# message(STATUS "Detecting native static libs for rust: ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs /dev/null") +# message(STATUS "Detecting native static libs for rust: ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs ${EMPTY_FILE}") execute_process( - COMMAND ${CMAKE_COMMAND} -E env "CARGO_TARGET_DIR=${CMAKE_BINARY_DIR}" ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs /dev/null + COMMAND ${CMAKE_COMMAND} -E env "CARGO_TARGET_DIR=${CMAKE_BINARY_DIR}" ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs ${EMPTY_FILE} OUTPUT_VARIABLE RUST_NATIVE_STATIC_LIBS_OUTPUT ERROR_VARIABLE RUST_NATIVE_STATIC_LIBS_ERROR RESULT_VARIABLE RUST_NATIVE_STATIC_LIBS_RESULT diff -Nru libclamunrar-1.0.0/debian/.git-dpm libclamunrar-1.0.3/debian/.git-dpm --- libclamunrar-1.0.0/debian/.git-dpm 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/.git-dpm 2023-09-07 16:41:30.000000000 +0000 @@ -1,8 +1,8 @@ # see git-dpm(1) from git-dpm package -26b772202d0845fe5ba6fadf66664a39918cb45d -26b772202d0845fe5ba6fadf66664a39918cb45d -376223ea637d9bb428a41050b14dc2179690c3f4 -376223ea637d9bb428a41050b14dc2179690c3f4 -libclamunrar_1.0.0.orig.tar.xz -17bb563e55b33844f82dcabf756fb7f20f6666cb -14291676 +33235611ffb90774de0dfbe3e8a43c8e962e0a8e +33235611ffb90774de0dfbe3e8a43c8e962e0a8e +71feb80a23a1e33ccefefca9a8e3ea883c22760f +71feb80a23a1e33ccefefca9a8e3ea883c22760f +libclamunrar_1.0.3.orig.tar.xz +6ef4d34555ea84e322a9b9e8c43f95761393e4da +14390520 diff -Nru libclamunrar-1.0.0/debian/changelog libclamunrar-1.0.3/debian/changelog --- libclamunrar-1.0.0/debian/changelog 2023-01-12 20:05:44.000000000 +0000 +++ libclamunrar-1.0.3/debian/changelog 2023-09-07 16:41:30.000000000 +0000 @@ -1,3 +1,14 @@ +libclamunrar (1.0.3-1~deb12u1) bookworm; urgency=medium + + [ Scott Kitterman ] + * Remove Stphen Gran from uploaders, add myself (Closes: #964554) + - Thanks for all your contributions over the years + + [ Sebastian Andrzej Siewior ] + * Import 1.0.3 + + -- Sebastian Andrzej Siewior Thu, 07 Sep 2023 18:41:30 +0200 + libclamunrar (1.0.0-2) unstable; urgency=medium * Upload to unstable diff -Nru libclamunrar-1.0.0/debian/control libclamunrar-1.0.3/debian/control --- libclamunrar-1.0.0/debian/control 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/control 2023-09-07 16:41:30.000000000 +0000 @@ -2,7 +2,7 @@ Priority: optional Section: non-free/libs Maintainer: ClamAV Team -Uploaders: Michael Tautschnig , Stephen Gran , Sebastian Andrzej Siewior +Uploaders: Michael Tautschnig , Sebastian Andrzej Siewior , Scott Kitterman Build-Depends: bindgen, cargo, check, diff -Nru libclamunrar-1.0.0/debian/copyright libclamunrar-1.0.3/debian/copyright --- libclamunrar-1.0.0/debian/copyright 2023-01-12 20:05:44.000000000 +0000 +++ libclamunrar-1.0.3/debian/copyright 2023-09-07 16:41:30.000000000 +0000 @@ -14,7 +14,6 @@ libclamav/tomsfastmath/* libclamav_rust/.cargo/vendor/winapi/* libclamav_rust/.cargo/vendor/winapi-i686-pc-windows-gnu/* - libclamav_rust/.cargo/vendor/winapi-util/* libclamav_rust/.cargo/vendor/winapi-x86_64-pc-windows-gnu/* libclamav_rust/.cargo/vendor/libloading/tests/nagisa* docs/html/* diff -Nru libclamunrar-1.0.0/debian/patches/Add-a-version-script-for-libclamunrar-and-.patch libclamunrar-1.0.3/debian/patches/Add-a-version-script-for-libclamunrar-and-.patch --- libclamunrar-1.0.0/debian/patches/Add-a-version-script-for-libclamunrar-and-.patch 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/patches/Add-a-version-script-for-libclamunrar-and-.patch 2023-09-07 16:41:30.000000000 +0000 @@ -1,6 +1,6 @@ -From 26b772202d0845fe5ba6fadf66664a39918cb45d Mon Sep 17 00:00:00 2001 +From 2839fee403d45a809aaab0b792ee8c6aae6bbb7f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior -Date: Tue, 10 Jan 2023 23:25:37 +0100 +Date: Wed, 6 Sep 2023 19:17:24 +0200 Subject: Add a version script for libclamunrar and libclamunrar_iface. Without a version script all symbols will be exported which are public @@ -15,25 +15,23 @@ Signed-off-by: Sebastian Andrzej Siewior --- - libclamunrar/CMakeLists.txt | 6 +++++- - libclamunrar_iface/CMakeLists.txt | 6 +++++- - 2 files changed, 10 insertions(+), 2 deletions(-) + libclamunrar/CMakeLists.txt | 4 ++++ + libclamunrar_iface/CMakeLists.txt | 4 ++++ + 2 files changed, 8 insertions(+) diff --git a/libclamunrar/CMakeLists.txt b/libclamunrar/CMakeLists.txt -index 6b924b466a28d..ab6c42f965c0f 100644 +index e54348f..ef10113 100644 --- a/libclamunrar/CMakeLists.txt +++ b/libclamunrar/CMakeLists.txt -@@ -70,7 +70,8 @@ if(ENABLE_SHARED_LIB) - add_library( clamunrar SHARED ) - set_target_properties(clamunrar PROPERTIES +@@ -71,6 +71,7 @@ if(ENABLE_SHARED_LIB) + set_target_properties( clamunrar PROPERTIES VERSION ${LIBCLAMAV_VERSION} -- SOVERSION ${LIBCLAMAV_SOVERSION}) -+ SOVERSION ${LIBCLAMAV_SOVERSION} -+ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libclamunrar.map) - target_sources( clamunrar - PRIVATE - ${UNRAR_SOURCES} -@@ -95,6 +96,9 @@ if(ENABLE_SHARED_LIB) + SOVERSION ${LIBCLAMAV_SOVERSION} ++ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libclamunrar.map + CXX_STANDARD 11 + ) + +@@ -98,6 +99,9 @@ if(ENABLE_SHARED_LIB) if(WIN32) set_target_properties(clamunrar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) @@ -44,20 +42,18 @@ if(WIN32) diff --git a/libclamunrar_iface/CMakeLists.txt b/libclamunrar_iface/CMakeLists.txt -index 4918eb8a1000e..b406f9810357a 100644 +index d7c4648..bf932d7 100644 --- a/libclamunrar_iface/CMakeLists.txt +++ b/libclamunrar_iface/CMakeLists.txt -@@ -36,7 +36,8 @@ if(ENABLE_UNRAR) - add_library( clamunrar_iface SHARED ) - set_target_properties(clamunrar_iface PROPERTIES +@@ -37,6 +37,7 @@ if(ENABLE_UNRAR) + set_target_properties( clamunrar_iface PROPERTIES VERSION ${LIBCLAMAV_VERSION} -- SOVERSION ${LIBCLAMAV_SOVERSION}) -+ SOVERSION ${LIBCLAMAV_SOVERSION} -+ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libclamunrar_iface.map) + SOVERSION ${LIBCLAMAV_SOVERSION} ++ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libclamunrar_iface.map + CXX_STANDARD 11 + ) - target_sources( clamunrar_iface - PRIVATE -@@ -62,6 +63,9 @@ if(ENABLE_UNRAR) +@@ -64,6 +65,9 @@ if(ENABLE_UNRAR) if(WIN32) set_target_properties(clamunrar_iface PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) diff -Nru libclamunrar-1.0.0/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch libclamunrar-1.0.3/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch --- libclamunrar-1.0.0/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch 2023-09-07 16:41:30.000000000 +0000 @@ -1,4 +1,4 @@ -From 06141f1ff58ba6f09ea7a982a7f1932166c04d19 Mon Sep 17 00:00:00 2001 +From f6d559b6c94c5137a5e8e4c11710111964b2420a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 6 Jan 2023 23:11:00 +0100 Subject: Add an option to avoid setting RPATH on unix systems. @@ -16,7 +16,7 @@ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 991580622a32d..77bdbed6e6536 100644 +index a15ea12..cfeeb49 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,13 +174,13 @@ endif() @@ -36,7 +36,7 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_FULL_LIBDIR}") else() diff --git a/CMakeOptions.cmake b/CMakeOptions.cmake -index d995bac5d0d35..92753521af07b 100644 +index d995bac..9275352 100644 --- a/CMakeOptions.cmake +++ b/CMakeOptions.cmake @@ -120,3 +120,6 @@ option(ENABLE_SYSTEMD diff -Nru libclamunrar-1.0.0/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch libclamunrar-1.0.3/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch --- libclamunrar-1.0.0/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch 2023-09-07 16:41:30.000000000 +0000 @@ -1,4 +1,4 @@ -From 974ba3d9a3f457e8eba2d91b4ebc128ce8f0e8b3 Mon Sep 17 00:00:00 2001 +From d9cfb3999a15b9992e9aad4663621f057f919edf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 30 Dec 2022 19:06:28 +0100 Subject: Use either system's tomfastmath library or the built-in one. @@ -15,7 +15,7 @@ create mode 100644 cmake/FindTOMSFASTMATH.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt -index 77bdbed6e6536..3e43ed677c6d9 100644 +index cfeeb49..d8119a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -460,6 +460,11 @@ if(ZLIB_FOUND) @@ -48,7 +48,7 @@ if(WIN32) message("\ diff --git a/CMakeOptions.cmake b/CMakeOptions.cmake -index 92753521af07b..aac5854f67699 100644 +index 9275352..aac5854 100644 --- a/CMakeOptions.cmake +++ b/CMakeOptions.cmake @@ -116,6 +116,9 @@ option(ENABLE_SYSTEMD @@ -63,7 +63,7 @@ option(RUST_COMPILER_TARGET diff --git a/cmake/FindTOMSFASTMATH.cmake b/cmake/FindTOMSFASTMATH.cmake new file mode 100644 -index 0000000000000..abe1b10eb686c +index 0000000..abe1b10 --- /dev/null +++ b/cmake/FindTOMSFASTMATH.cmake @@ -0,0 +1,85 @@ @@ -153,7 +153,7 @@ +) +endif() diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt -index 6bc426f040e62..0d35e3a4a0d94 100644 +index 6bc426f..0d35e3a 100644 --- a/libclamav/CMakeLists.txt +++ b/libclamav/CMakeLists.txt @@ -3,6 +3,7 @@ @@ -199,7 +199,7 @@ ${LIBMSPACK} ClamAV::libclamav_rust diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt -index 567e95e7fac83..0122929f58643 100644 +index 567e95e..0122929 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -49,7 +49,7 @@ if(ENABLE_APP) diff -Nru libclamunrar-1.0.0/debian/patches/cargo-Remove-windows-referenfes.patch libclamunrar-1.0.3/debian/patches/cargo-Remove-windows-referenfes.patch --- libclamunrar-1.0.0/debian/patches/cargo-Remove-windows-referenfes.patch 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/patches/cargo-Remove-windows-referenfes.patch 2023-09-07 16:41:30.000000000 +0000 @@ -1,4 +1,4 @@ -From 110a241f787b8f858faee0433df63ee3a97656e9 Mon Sep 17 00:00:00 2001 +From 33235611ffb90774de0dfbe3e8a43c8e962e0a8e Mon Sep 17 00:00:00 2001 From: Scott Kitterman Date: Sat, 31 Dec 2022 12:12:58 +0100 Subject: cargo: Remove windows referenfes. @@ -9,6 +9,7 @@ complain about it. Patch-Name: cargo-Remove-windows-referenfes.patch +Signed-off-by: Sebastian Andrzej Siewior --- .../.cargo/vendor/ansi_term/.cargo-checksum.json | 2 +- libclamav_rust/.cargo/vendor/ansi_term/Cargo.toml | 6 +++--- @@ -20,12 +21,10 @@ .../.cargo/vendor/remove_dir_all/Cargo.toml | 4 ++-- .../.cargo/vendor/tempfile/.cargo-checksum.json | 2 +- libclamav_rust/.cargo/vendor/tempfile/Cargo.toml | 6 +++--- - .../.cargo/vendor/termcolor/.cargo-checksum.json | 2 +- - libclamav_rust/.cargo/vendor/termcolor/Cargo.toml | 4 ++-- - 12 files changed, 25 insertions(+), 25 deletions(-) + 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json -index ee41459dbeec2..2c87a6df8caa5 100644 +index ee41459..2c87a6d 100644 --- a/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json +++ b/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json @@ -1 +1 @@ @@ -33,7 +32,7 @@ \ No newline at end of file +{"files":{"Cargo.lock":"31bb7b361278d99a00595cbd916c444e6fd193b5f0b1ea0cf2d9454440739501","LICENCE":"2762990c7fbba9d550802a2593c1d857dcd52596bb0f9f192a97e9a7ac5f4f9e","README.md":"8d983e1bb3cc99724010d9073a5be6452cd49bd57a877525fd0a5dd41e6591d5","examples/256_colours.rs":"5f2845068bc2d93cff4a61f18ffa44fbbbc91be771dfd686d537d343f37041da","examples/basic_colours.rs":"d610795f3743d10d90ec4e5ab32cc09fb16640896cecd2f93fca434a0920397c","examples/rgb_colours.rs":"8399e5131e959a56c932036b790e601fb4ad658856112daf87f933889b443f2c","src/ansi.rs":"988fb87936064fa006fcc9474ac62099c8d6e98d38bb80cec2cd864066482a08","src/debug.rs":"61343f8bf13695020102c033aeaacd9ccd3ec830eacbf9011127e61829451d20","src/difference.rs":"9b4b8f91c72932bfda262abdceff0ec124a5a8dd27d07bd4d2e5e7889135c6c9","src/display.rs":"c04f2397d1d1d86a5e2188c2840c505cb0baeaf9706a88d4bbe56eadc67811b9","src/lib.rs":"b85df4b9b8832cda777db049efa2ec84b9847438fa3feaf8540e597ce2532a47","src/style.rs":"1042fc973f5ea8bbb2a2faec334aad530520b53edc9b3296174ae38c1060490b","src/util.rs":"07c127f732887573a1c9126fc0288e13e7a8f1f803513b95e50aac2905171b0d","src/windows.rs":"7ce7dd6738b9728fcd3908c284b6f29a9bdfb34af761b4c7385cf7e3e1b20e64","src/write.rs":"c9ec03764ad1ecea8b680243c9cafc5e70919fcea7500cc18246ffd8f6bb4b33"},"package":"d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"} diff --git a/libclamav_rust/.cargo/vendor/ansi_term/Cargo.toml b/libclamav_rust/.cargo/vendor/ansi_term/Cargo.toml -index 0e5febabd2fee..3314b47c2a38d 100644 +index 0e5feba..3314b47 100644 --- a/libclamav_rust/.cargo/vendor/ansi_term/Cargo.toml +++ b/libclamav_rust/.cargo/vendor/ansi_term/Cargo.toml @@ -38,6 +38,6 @@ version = "1.0.39" @@ -47,7 +46,7 @@ +#version = "0.3.4" +#features = ["consoleapi", "errhandlingapi", "fileapi", "handleapi", "processenv"] diff --git a/libclamav_rust/.cargo/vendor/atty/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/atty/.cargo-checksum.json -index a68b85b16d395..8a392c512aa57 100644 +index a68b85b..8a392c5 100644 --- a/libclamav_rust/.cargo/vendor/atty/.cargo-checksum.json +++ b/libclamav_rust/.cargo/vendor/atty/.cargo-checksum.json @@ -1 +1 @@ @@ -55,7 +54,7 @@ \ No newline at end of file +{"files":{"CHANGELOG.md":"70db121262d72acc472ad1a90b78c42de570820e65b566c6b9339b62e636d572","Cargo.lock":"6868f02a96413bcba37a06f01c6bf87e6331dea9461681a47a561cec6acd2546","LICENSE":"99fa95ba4e4cdaf71c27d73260ea069fc4515b3d02fde3020c5b562280006cbc","README.md":"e559a69c0b2bd20bffcede64fd548df6c671b0d1504613c5e3e5d884d759caea","examples/atty.rs":"1551387a71474d9ac1b5153231f884e9e05213badcfaa3494ad2cb7ea958374a","rustfmt.toml":"8e6ea1bcb79c505490034020c98e9b472f4ac4113f245bae90f5e1217b1ec65a","src/lib.rs":"d5abf6a54e8c496c486572bdc91eef10480f6ad126c4287f039df5feff7a9bbb"},"package":"d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"} diff --git a/libclamav_rust/.cargo/vendor/atty/Cargo.toml b/libclamav_rust/.cargo/vendor/atty/Cargo.toml -index d6bf2d03b36ee..f846a68598546 100644 +index d6bf2d0..f846a68 100644 --- a/libclamav_rust/.cargo/vendor/atty/Cargo.toml +++ b/libclamav_rust/.cargo/vendor/atty/Cargo.toml @@ -27,8 +27,8 @@ version = "0.1.6" @@ -71,7 +70,7 @@ [badges.travis-ci] repository = "softprops/atty" diff --git a/libclamav_rust/.cargo/vendor/libloading/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/libloading/.cargo-checksum.json -index 04164017dd68e..884f3c8554a0c 100644 +index 0416401..884f3c8 100644 --- a/libclamav_rust/.cargo/vendor/libloading/.cargo-checksum.json +++ b/libclamav_rust/.cargo/vendor/libloading/.cargo-checksum.json @@ -1 +1 @@ @@ -79,7 +78,7 @@ \ No newline at end of file +{"files":{},"package":"b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"} diff --git a/libclamav_rust/.cargo/vendor/libloading/Cargo.toml b/libclamav_rust/.cargo/vendor/libloading/Cargo.toml -index 65168d58585e1..a43837fbf4a31 100644 +index 65168d5..a43837f 100644 --- a/libclamav_rust/.cargo/vendor/libloading/Cargo.toml +++ b/libclamav_rust/.cargo/vendor/libloading/Cargo.toml @@ -43,9 +43,9 @@ version = "1.1" @@ -99,7 +98,7 @@ +# "libloaderapi", +#] diff --git a/libclamav_rust/.cargo/vendor/remove_dir_all/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/remove_dir_all/.cargo-checksum.json -index bc449a299f9aa..b5d9233861e82 100644 +index bc449a2..b5d9233 100644 --- a/libclamav_rust/.cargo/vendor/remove_dir_all/.cargo-checksum.json +++ b/libclamav_rust/.cargo/vendor/remove_dir_all/.cargo-checksum.json @@ -1 +1 @@ @@ -107,7 +106,7 @@ \ No newline at end of file +{"files":{"LICENCE-APACHE":"c6c8c9dbe29fb4d68d829c7a402f9f6baae3472ecf107cc2a57c75a9a8d1b85c","LICENCE-MIT":"db264505cb1856383e255c8373da9e5aeadc1cd92b570fcc94fd1fb7d892db78","README.md":"167f3796d716e1bb4a6b98d706fd3c02012dff55d488a24e7de822d896d3cc5a","src/fs.rs":"a7137d7f3a5769cd547daf2be2096a7a664d6114107a3f143c921c4aaab97719","src/lib.rs":"8155ac516b4d054de00d78ce70501175bea7248c0436e4a7f0d35823299f7dc2"},"package":"3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"} diff --git a/libclamav_rust/.cargo/vendor/remove_dir_all/Cargo.toml b/libclamav_rust/.cargo/vendor/remove_dir_all/Cargo.toml -index a847288925eba..f9cdf3057da24 100644 +index a847288..f9cdf30 100644 --- a/libclamav_rust/.cargo/vendor/remove_dir_all/Cargo.toml +++ b/libclamav_rust/.cargo/vendor/remove_dir_all/Cargo.toml @@ -23,6 +23,6 @@ license = "MIT/Apache-2.0" @@ -120,7 +119,7 @@ +#version = "0.3" features = ["std", "errhandlingapi", "winerror", "fileapi", "winbase"] diff --git a/libclamav_rust/.cargo/vendor/tempfile/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/tempfile/.cargo-checksum.json -index 26f8560bcde58..9102e3d3fef60 100644 +index 26f8560..9102e3d 100644 --- a/libclamav_rust/.cargo/vendor/tempfile/.cargo-checksum.json +++ b/libclamav_rust/.cargo/vendor/tempfile/.cargo-checksum.json @@ -1 +1 @@ @@ -128,7 +127,7 @@ \ No newline at end of file +{"files":{"LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"8b427f5bc501764575e52ba4f9d95673cf8f6d80a86d0d06599852e1a9a20a36","NEWS":"4255c86ac140a4d08423cd05cbd0aa42ff796bb4b38579dd19cde289ee3baecd","README.md":"db6717cbd0b3cbbce5f3cdb8a80d8f2d90b1be251b4c1c647557ae0f78ec9748","src/dir.rs":"4499ff439b740f8d2f01458664e2bf72bbfdd1206226780c6a91fb309ef15707","src/error.rs":"cc7d8eace0fff11cb342158d2885d5637bfb14b24ef30755e808554772039c5f","src/file/imp/mod.rs":"f6da9fcd93f11889670a251fdd8231b5f4614e5a971b7b183f52b44af68568d5","src/file/imp/other.rs":"99c8f9f3251199fc31e7b88810134712e5725fb6fa14648696ed5cbea980fc5b","src/file/imp/unix.rs":"cf8eeceecfddc37c9eaf95a1ebe088314dc468f07fe357961d80817eef619ca4","src/file/imp/windows.rs":"03d81d71c404f0d448e1162825d6fbd57a78b4af8d4dc5287ec2e7c5a873d7cc","src/file/mod.rs":"bda4ee3998106089a4c0ccbc8e46dc22b7d3aec427487fd4e414fb132b378736","src/lib.rs":"e2b0df7e17cc6680a5bb0829d0433f069c6bf9eede2007d21e3b01a595df41a8","src/spooled.rs":"51fa1d7639027234e257d343a5d3c95f2e47899ba6a24f0abec8d4d729eba6d6","src/util.rs":"2bd80ee69009e7e36b596d0105bb00184cff04e899e9fcce2e4cc21f23dda073","tests/namedtempfile.rs":"0031cb33ae6faf45be103869b4d98af63bef4040dc489b323212eb7a7ef72a9a","tests/spooled.rs":"29e797d486d867cb6ac46d4cf126eb5868a069a4070c3f50ffa02fbb0b887934","tests/tempdir.rs":"771d555d4eaa410207d212eb3744e016e0b5a22f1f1b7199636a4fac5daaf952","tests/tempfile.rs":"92078a1e20a39af77c1daa9a422345d20c41584dd2010b4829911c8741d1c628"},"package":"5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"} diff --git a/libclamav_rust/.cargo/vendor/tempfile/Cargo.toml b/libclamav_rust/.cargo/vendor/tempfile/Cargo.toml -index 253f8667b8392..51daf19439a3b 100644 +index 253f866..51daf19 100644 --- a/libclamav_rust/.cargo/vendor/tempfile/Cargo.toml +++ b/libclamav_rust/.cargo/vendor/tempfile/Cargo.toml @@ -38,6 +38,6 @@ nightly = [] @@ -141,23 +140,3 @@ +#[target."cfg(windows)".dependencies.winapi] +#version = "0.3" +#features = ["fileapi", "handleapi", "winbase"] -diff --git a/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -index 9994c6f1e7aa8..df99247723aaf 100644 ---- a/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -+++ b/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -@@ -1 +1 @@ --{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"c9cf1882497f275a2607a7a3bbee959c90cc9664832f2a4e2a26005e21795834","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"cc4c882bde8d2ef26ef4770ff30d60eda603d87ae32e16d99525dc88f3377238","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/lib.rs":"91e857a8937bebc50b38a365a3f5138c5898648a14e4b73b84d067c9251cff02"},"package":"bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"} -\ No newline at end of file -+{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"cc4c882bde8d2ef26ef4770ff30d60eda603d87ae32e16d99525dc88f3377238","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/lib.rs":"91e857a8937bebc50b38a365a3f5138c5898648a14e4b73b84d067c9251cff02"},"package":"bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"} -diff --git a/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml b/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -index 9a996a591bb73..3b9e25ef70dce 100644 ---- a/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -+++ b/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -@@ -39,5 +39,5 @@ repository = "https://github.com/BurntSushi/termcolor" - name = "termcolor" - bench = false - --[target."cfg(windows)".dependencies.winapi-util] --version = "0.1.3" -+#[target."cfg(windows)".dependencies.winapi-util] -+#version = "0.1.3" diff -Nru libclamunrar-1.0.0/debian/patches/series libclamunrar-1.0.3/debian/patches/series --- libclamunrar-1.0.0/debian/patches/series 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/patches/series 2023-09-07 16:41:30.000000000 +0000 @@ -1,4 +1,4 @@ Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch -cargo-Remove-windows-referenfes.patch Use-either-system-s-tomfastmath-library-or-the-built.patch Add-a-version-script-for-libclamunrar-and-.patch +cargo-Remove-windows-referenfes.patch diff -Nru libclamunrar-1.0.0/debian/upstream/signing-key.asc libclamunrar-1.0.3/debian/upstream/signing-key.asc --- libclamunrar-1.0.0/debian/upstream/signing-key.asc 2023-01-11 22:19:42.000000000 +0000 +++ libclamunrar-1.0.3/debian/upstream/signing-key.asc 2023-09-07 16:41:30.000000000 +0000 @@ -1,51 +1,63 @@ -----BEGIN PGP PUBLIC KEY BLOCK----- -mQINBGBjkiwBEADgJTEabt5zCareK9pJJswGU62smrq3uOaaDhtgztj3bxRY/UGT -jypxMee1S/fGWQZQy52lFOXLud5gFC5QU8Yk+7EAsh2ZJSKtWUw8/iMxZ4vsrKVV -QQRLTqMUY16R6/8UzdIT/hD6CbgWgiXF4NH5AGleNqjkF4TXrGof0AK0veekZYJV -WWStqJR/cIiG0nxDQ87RWfeZgrULZmA8uii22po7rGGzxT0byb83dKK+7IoJ/6B/ -ZlI0PmzuJ9/Xp6Mmm//sdPEqRwedt2aGrvtdF79xYJ1tDhOVMpID0aPdURBwlliq -fyKGaIUEa1ke+Dy7sQF8i3zY7ce6PZOtbsts9xsJLvF98VhRsFy0vProPv1mVbiU -PoxxPTnyLeGUm27amIMl4NfX4a8Hdu+ExzKprqWo3Ir08HQzNt6QoFghDIpi9nm4 -k327CJzJv/g2dq5kY/KU6wFHbdH3zP7u+p9DDqKJYFebPCvwM1hMxPdLqemTsfob -kJ4iXcAXjpMqwXX9m0lyQcRHdIdc99yyCUMdPNfapLgY7rOahsS16795/5KSrCuF -h2RcoAWUjh6sGjgGIY4Hy1qQwp3t6X/L6TOhDkBDWId5bTKFR9NqrVprOVsUutbs -0TOqLyH4GXCpE9vzg8DX7FTdRiCTpbyQ7VuSxRN/vAyVRP4chrABNfvh/QARAQAB +mQINBGQPO58BEACsF0vtWepeSZRklvCG170RKuZL+9aH8U3zVVtQgDlmcboVRiFf ++fgraQCRVh8cbRM76mqqGoMT0BlwZ1OfrzpZcrNUg5uAgok51P7SoCy3zummnv4M +TadwDLEHNf/38HSnrJe196IiwMEtyuKMGDfzyjQnr357Owem+7FgT2/sU7XwWD2B ++tn/yhbw+HpJuUjdmxmEqJr/4okRSj9OSWV+EFhS9owMNK8zntwHkJzmv4ctS1Ak +Zryh/J3jEnPqzSJDsH729XzKpG4BxCxnybP5WuMsJuNvSlVhVko1PaSi84Dy003w +WoQIgtQHNm6i8CcetrpNCULELTU8sViwdBQXIlGjCa3N+dq1ZOErasp4QzlCVOus +iOkm1KltvbJWPfVDW0A0Z4mP19YRlQTc0jn4w9R5ROmwcLf6Co8nBD2AV8MFjVJA +E21Mfj6LfksplmYg/DEa4kCe8KYPSATq6LFSf+o96fkmnsZovOi6zZ6RtV9l4Aya +pkcvk9iO2cvJMDYJ6iA2dC8EHC2m1tt1Rs2abJqOmsUJATo7MUpK7MD7NyhVvkjJ +j5QRES25uV4OY9ck091GB+XXAf3gGf3Pi2jop1gauGoxyBqLT4SkwqsnsrFF8eEh +A8UdBmo4K6MWFaxw6JsBPpIM63Qe848RzlQRanxS2n50ZZwMLIJrI2MEFQARAQAB tDtUYWxvcyAoVGFsb3MsIENpc2NvIFN5c3RlbXMgSW5jLikgPHJlc2VhcmNoQHNv -dXJjZWZpcmUuY29tPokCPgQTAQIAKAUCYGOSLAIbAwUJA8JnAAYLCQgHAwIGFQgC -CQoLBBYCAwECHgECF4AACgkQYJsCTys+3QfbLg//eZ0yCLr957FtztVlLIHYLpJn -LIl8m+hu3KeUTIwvMoCLiw48cWqFZaJS9PTmrraSj5SKMDnAYFl4O0fhHfQiWDjb -sZ32hQni1PcqxoXqSnkXD7mXjcPH2WuNnQM5WZoAD2VmksqRT57I/K2omW/sjaVe -Nbq3GSOy8WThibswxzioDHtTPFa0/Ah2qq8OkcVJuTwCS1xkLijJc3jx/pOBHWFA -BA4VX5pwcSou/woJ+ySsgBGEo5hOsd0r7h3a0O8EiuGulHTqQt87rVWGv0JKhnub -FULr/ld8+d1zGvJL3OzFG6udjWjw3QqsLDZa94G1ksZWgqr/RgexlSYuxPW+lKUC -QkgotLaEKQC4cpBLRcJEjWyrf4IjoJvkFrUtPsVH9VStICUQATyXARNVWbnJHq3Y -qynCXSB4NZvdo9BF6Tx3FA+ZUjK4/X/UsjL/Hmv99huBctQsWL7gQCoSw9YOt4qs -/As6fgPaNpYb9woJqNMEQNmrhfnnX9PGaM5dM769/E5vF67mkhBNqVJ0+4gyrpTU -T7Pmavrc3T4aSSde8eG6zSlmW8wM5xELfK5TeTexBKGAaDV8c2BkfenRO8OvBSvr -Gz+Xp/YzO9uGUPnbMsTVtxClmzmEj/MVpvtRdEo+dbVOSy8nk3XCu7jMjpojggPv -YQ+4CZYxYpW1T2hSFxG5Ag0EYGOSLAEQAM5kdheiwStznKiaIWaO+0PBA8bAv2xG -7qW/Di85xdcH9miHZM9+lx/iZoOBC9wZC9eatV4Hcukff700a/LGZSYVDvHvdEWb -Tv6ZwvHzbxuc1Kv8cLYopRUfOAwMYOmXriMLxVmd3fcfPNsfPRqfkaZRdkm7qTbP -DeKpSL157HbUG64Eej3cOViq49Hy9L6jtfjtZVxX7OavjnEpyezG6qSIAkvD6O7J -Yg3yfkr4sa44qohq9lDfjWpoXMebu0WsIyW11hm+7KMrBMHjlNgXppu0+ryeKfQi -FjPDBd9aflnHy2e8aHef9S5349thNGzjV3TNMV6A6oAN2XQ7pgj5DTwMZtHFCjdE -HIyfGCAgQQL0/MaFzKwuw/l/m31smZgItAZXYY1xoC2gh7LTPZ/3t2VVVof4TNXD -c+pUNgY6bwPBksuhsX8qsldDr5q3jdHZsjlycpL38Z4EZNg3BqxJlVseB395ZOQ6 -FCtHGh6rpsYQZDj1QWcUyev8NHSbSNRMS2/Nn5bT3KgEWEWrmOxp3iMmunBqmnt2 -/xJ83PKRTbSKgcG+Y/+DtnleHpRueRUPC/5XX0DNznSjF10vAh4XtBKGBNaHU9Vv -nMXlYeJ9kCMdSs7cM4FfLFMtPkFwpDYhvQRAEwt11RV6bGo5ZCgGrHGIBlNk6ZSO -1hP15hUtkWU7ABEBAAGJAiUEGAECAA8FAmBjkiwCGwwFCQPCZwAACgkQYJsCTys+ -3QfI7Q//Sb2yotfcsG5Q2FkHRBE85su01c6pewImV9bofNhATSQ37yVHUDrchm+k -Y6Pq5Tdgg+eAMcYz2yv9JhFxJyzgI0viQrkjD7oXeRTGZ0CvzxHhTakAOADXAnYt -wmJglEBTCCbUZ968kQkdBxEaUjVWPCMyIceRr8kUfiCjX51+DLESy8b5lOBhprO6 -vDukk/rmDruIpJPhJ3f89gsp2Ry7gk7a5ENIuVEElLK6OPBZhC3dDZwsvm5CYb62 -+U/b1xtmElpgGbNJCjxvAZiJ0WN2zfBXan+SJ4I9NFUw9jvSURvDV24s4YPhkbZu -OIqQEEYF8QMZ1VJlsr7BoWIXrdKDNJbmEVyx3UiYXKD1BVXCQADPu8G8EPuo4yAf -WymJAOJbAqNF2Op6+sC7/v8Xcgc3PGGyu23cZwikfCAgV+beywTPI5+eVV5F/rpx -XOlvNxT0NOg3UOeQ9GvCbD5ZcuDzmhqso0eMABeq5K5XB12xlWNaTZsIt8Dim4uK -aKMGeB+6iygkHITbay0sMUo0dX6nT27bjX5dTBo/vnVAPYuCS6rh8ojalR1fYFKA -1zdeSaJ2EW5KmgC9yedylSbHdQ+LjSY3t/Ut4RYaekIDeGmVoQkJkL7gIAs8NOYw -G3ayr0AtmeMagAMy94NH5ufVgFk+QPmXpzS7rMLQ3Is1ZOuWNrQ= -=gazS +dXJjZWZpcmUuY29tPokCPgQTAQIAKAUCZA87nwIbAwUJA8JnAAYLCQgHAwIGFQgC +CQoLBBYCAwECHgECF4AACgkQzODf0h7Bqb8gjw/9FYbuwzBjuVCVhHfoY4rfCDoj +eh3NVaTdHIWO1yp6JSM/ny+Z3wDzZLtyQlBcnaJlerncS961iOEG2gBA3v8fZudN +JFpvRC1gxd9IEhGXIDDg+BeOAJUbY9LQTc/dnzWBB04nun20+lM/Rad2BlkQ+YSz +uRUaFsgk0lQPCSDQfoahtoap14jWFsa19aOjTXhAF1MGEDXuoCXM6ByH6wJjtz+z +QJrXvmHS4v8yh8z/pibLGV7IgNrtoW2ej4jFadzEEn/MDajI+5N3C2w5rD41L7Lm +j1uCIBe1G54fgJSstvBxZcnAj9qTF2FBBUpQ1q/ONFfUjpAGQKG2qh1UNBiOZNS3 +gDVN2T8h083WRN2gQvNJnJwXaF4Nm6zhmX4sUqE9nexUrDF8VG8xXJwPgZijaHPV +nZdgDZvQ47BKiJOUj80O9/qYyWo89pX6Rr/YmfbURhRe/kiPon9kIVFCzDDFPniJ +svICjpdkz7wZ0kUN+L7BtDQJfjFjTJPNA2nOV6l64DcdCiyutOFSz4Zf85GoT9wK +Mqv1UmpLwsq2FnF+Gpk1GLZCLprSCu3n16pr+gdRshnE93cvJbMGlP0+jcuFF5hr +Lsvujl7O81JrIjmGXrulHHpdrZQ4J2A3UpDDc60DOHG9ubnBnN7k2kQPY+9a1rzf +WPkMQKaxVo3uH1XRO/GJAhwEEAECAAYFAmQPQKgACgkQYJsCTys+3QcvuA//cuJX +LDfsGn9pWTCU83cF6eiQ5Id5FPKldyhSqYRgavgRov0fwD6ZU79dpURf+YsWxxtI +pIntn9hUgSgmdyUw+0GcAmFq6gJOQxWY2nij6X0A9Pskr2qW+WhMGKKVbYez65qw +fgGdlDFT/4nzVBGpIlRGGuOC0aT3jDhBXbp8Eusxi+5He7Kx2Chem7kCX9xBpUYS +FrujMlaMs8O1bsBW3xTWLpHhX6O6bpEY8zDfWavSAqCmzw5RtytAJWsAG1clU9AK +FwSKC+10ODo5VFzmRSgF727Gtuow1WnPhFM/7Cn+M+knCTm2vRz6Vz29/a6DUrZl +CbyKGPR8a9C3UG4VT8C3+fi1boZ+/trUw27YtrKp70FDy3UdgLDF2eO9B77vs35n ++hf2EipG407CGBqb8q6boOdxC0BN/Fcy30Oms4DSUTqEiqvSA/35BhyGfOmJb5tt +kMEHLPveJvilICKBMQdYHemR3mk+muzAO7+y4VOKl+rP0xXCp6y6PAiEu14lzxzI +isQu6omEJBOUiad2iZz+4OUU1Dil0YgUpNgJQyKaDUOR0MSzFU9IM5pzZJ14XkdG +6iriPEX1V9SlfZlaJDNlN11vFlVFeu02vJTcddAaHYad2tKD09GAEuZkib0ToWxz +S+4cBxojti6vMUHVSIlbov7ZMHd/WMqQUb1tSl65Ag0EZA87nwEQALkEL5rxEnv7 +rcwcF3KwcppfHTWjkTV0dyMmE/kLf9e3QnMdCaiZMypxmYipOe9Z/9G6YGH+Qujp +N0mzenNgKljs961VTbOUYTusgwTz1qFienX8lg+eYRQIpqPjisb1xGlISojI7vWO +FZT/LrxVI6Y+HLSXkZjPD7TqyefgOlP2YchmFAjC/e+rtKAZ+FLlguotvDRxl/zp +AA8LLFup8Y8+BvQIWiy6jwwAjJMiJdwBtUz1OxpMuGU/C6bWCkAAFKjhC5F9JQEI +9jHh7/cQEGabDmjIGfywj9jniJrP79hrLfuryFvo6qbw7EwirJbKpoHJwS03ei29 +Uwttw2Dn41dZ0MvjfpYwI61cE5NpvKCBJkkEho6SDXGvLABerEu3ASGlYybQOzrg +aHO9AxGXgD2tFjI0NNunVxy/0KQ+kWcdQ1p/dk/O2U6w5CfFHU68aZgAxmj7jngx +YKjs3IAUy8mwkxtyyFiLJ3E19NdB8+t0cjJMtDVtXOgmoi7HaP8RghdaitaI4q/z +ocIAWhJhN7IkzrYWJ/Bkq4j0doKmaDR8GPP3i5Keg1c1z4yGX1c9MWTMy49l5Nwl +/bUjUiIRocCc33dZCqL5KPMBdtLJOUiIG/KZoMqr6Ozxyriv4Nn/CT2/SSvatYtP +SN91kt61c2FmoBBSltiFwncbUVmB3HmDABEBAAGJAiUEGAECAA8FAmQPO58CGwwF +CQPCZwAACgkQzODf0h7Bqb/ueQ/7BofldLW0/GqvTMEDnysUB/tchWzae6LnBeur +EhIB6smOVkMiuzrRLl2/vFVmv6H1UZK2fRPpaI/3V2mg+ML5ioVVgBrg3IQxcDpY +sYiictUFXJQ9y/ygAl8zxbkE4v4BWAwk5kIFWw1q/sb3IUc07GeK16PLY0+ocPdV +vMyiV8w5wKBlkyPwdntjuJEyfU3lsIeR2iBcQe4HL1Y0/pm6Ilpn+uj2ZYlYZzhN +zBuLy9HB3it161KP/RyxWNB1AEAAx8Mh0IhHOEWLvbfjHJxkJ2GX0TgL5wa45l2a +3clP4Dw2MpLfzIHs+CxG7t6IdSvoX1+0gZPvmo9JXDsLNa7+uu/lcCUjXY9TWdvc +VIZRwlSBQQC8WnGpbkvsBDsJ2BskPWOmv0ol3aiiekJJhVT1K9M1ZwDGX1ts8hLr +mf0kCFDq0RImCg6WZAM6z3Fg/1pPGPRktJ4tmSui3GYzrVA34gTunvlqPYKCFYHA +EdUdqycz7UAroj7k3OndZGnnT2r/qKaIYF53/u+6SXM/lUSrJfwxG9eXiw80P/YW +K9VjT3CbQA74vz7pC1bxpYDas6w39DRpkYR1bn1GIhmJhK2CUj5FQla+opVN2Wmg +sk0O7hoet7RDvKpoUyBHxHOJseDQEzWc38bOxD+x0vz/MirBnLdBx8g836tgqy7h +ab6V2qU= +=X+5e -----END PGP PUBLIC KEY BLOCK----- diff -Nru libclamunrar-1.0.0/libclamav/autoit.c libclamunrar-1.0.3/libclamav/autoit.c --- libclamunrar-1.0.0/libclamav/autoit.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/autoit.c 2023-08-25 21:18:34.000000000 +0000 @@ -761,6 +761,10 @@ cli_dbgmsg("autoit: file is compressed\n"); if (cli_readint32(UNP.inputbuf) != 0x35304145) { cli_dbgmsg("autoit: bad magic or unsupported version\n"); + // Free this inputbuf and set back to NULL. + free(UNP.inputbuf); + UNP.inputbuf = NULL; + continue; } @@ -769,6 +773,10 @@ } if (cli_checklimits("autoit", ctx, UNP.usize, 0, 0) != CL_CLEAN) { + // Free this inputbuf and set back to NULL. + free(UNP.inputbuf); + UNP.inputbuf = NULL; + continue; } @@ -848,12 +856,16 @@ */ cli_dbgmsg("autoit: file is not compressed\n"); UNP.outputbuf = UNP.inputbuf; - UNP.usize = UNP.csize; + UNP.inputbuf = NULL; + + UNP.usize = UNP.csize; } if (UNP.usize < 4) { cli_dbgmsg("autoit: file is too short\n"); free(UNP.outputbuf); + UNP.outputbuf = NULL; + continue; } diff -Nru libclamunrar-1.0.0/libclamav/bytecode_api.h libclamunrar-1.0.3/libclamav/bytecode_api.h --- libclamunrar-1.0.0/libclamav/bytecode_api.h 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/bytecode_api.h 2023-08-25 21:18:34.000000000 +0000 @@ -163,7 +163,10 @@ FUNC_LEVEL_0105 = 150, /**< LibClamAV release 0.105.0 */ FUNC_LEVEL_0105_1 = 151, /**< LibClamAV release 0.105.1 */ - FUNC_LEVEL_1_0 = 160, /**< LibClamAV release 1.0.0 */ + FUNC_LEVEL_1_0 = 160, /**< LibClamAV release 1.0.0 */ + FUNC_LEVEL_1_0_1 = 161, /**< LibClamAV release 1.0.1 */ + FUNC_LEVEL_1_0_2 = 162, /**< LibClamAV release 1.0.2 */ + FUNC_LEVEL_1_0_3 = 163, /**< LibClamAV release 1.0.3 */ }; /** diff -Nru libclamunrar-1.0.0/libclamav/dmg.c libclamunrar-1.0.3/libclamav/dmg.c --- libclamunrar-1.0.0/libclamav/dmg.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/dmg.c 2023-08-25 21:18:34.000000000 +0000 @@ -207,8 +207,7 @@ /* This is the block where we require libxml2 */ #if HAVE_LIBXML2 -/* XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_COMPACT */ -#define DMG_XML_PARSE_OPTS ((1 << 1 | 1 << 11 | 1 << 16) | CLAMAV_MIN_XMLREADER_FLAGS) +#define DMG_XML_PARSE_OPTS ((XML_PARSE_NONET | XML_PARSE_COMPACT) | CLAMAV_MIN_XMLREADER_FLAGS) reader = xmlReaderForMemory(outdata, (int)hdr.xmlLength, "toc.xml", NULL, DMG_XML_PARSE_OPTS); if (!reader) { diff -Nru libclamunrar-1.0.0/libclamav/hfsplus.c libclamunrar-1.0.3/libclamav/hfsplus.c --- libclamunrar-1.0.0/libclamav/hfsplus.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/hfsplus.c 2023-08-25 21:18:34.000000000 +0000 @@ -54,7 +54,8 @@ hfsPlusForkData *, const char *, char **, char *); static cl_error_t hfsplus_validate_catalog(cli_ctx *, hfsPlusVolumeHeader *, hfsHeaderRecord *); static cl_error_t hfsplus_fetch_node(cli_ctx *, hfsPlusVolumeHeader *, hfsHeaderRecord *, - hfsHeaderRecord *, hfsPlusForkData *, uint32_t, uint8_t *); + hfsHeaderRecord *, hfsPlusForkData *, uint32_t, uint8_t *, + size_t); static cl_error_t hfsplus_walk_catalog(cli_ctx *, hfsPlusVolumeHeader *, hfsHeaderRecord *, hfsHeaderRecord *, hfsHeaderRecord *, const char *); @@ -546,7 +547,7 @@ } /* fetch node into buffer */ - status = hfsplus_fetch_node(ctx, volHeader, attrHeader, NULL, &(volHeader->attributesFile), thisNode, nodeBuf); + status = hfsplus_fetch_node(ctx, volHeader, attrHeader, NULL, &(volHeader->attributesFile), thisNode, nodeBuf, nodeSize); if (status != CL_SUCCESS) { cli_dbgmsg("hfsplus_check_attribute: node fetch failed.\n"); goto done; @@ -656,7 +657,8 @@ /* Fetch a node's contents into the buffer */ static cl_error_t hfsplus_fetch_node(cli_ctx *ctx, hfsPlusVolumeHeader *volHeader, hfsHeaderRecord *catHeader, - hfsHeaderRecord *extHeader, hfsPlusForkData *catFork, uint32_t node, uint8_t *buff) + hfsHeaderRecord *extHeader, hfsPlusForkData *catFork, uint32_t node, uint8_t *buff, + size_t buffSize) { bool foundBlock = false; uint64_t catalogOffset; @@ -741,6 +743,11 @@ readSize = endSize; } + if ((buffOffset + readSize) > buffSize) { + cli_dbgmsg("hfsplus_fetch_node: Not enough space for read\n"); + return CL_EFORMAT; + } + if (fmap_readn(ctx->fmap, buff + buffOffset, fileOffset, readSize) != readSize) { cli_dbgmsg("hfsplus_fetch_node: not all bytes read\n"); return CL_EFORMAT; @@ -964,7 +971,7 @@ } /* fetch node into buffer */ - status = hfsplus_fetch_node(ctx, volHeader, catHeader, extHeader, &(volHeader->catalogFile), thisNode, nodeBuf); + status = hfsplus_fetch_node(ctx, volHeader, catHeader, extHeader, &(volHeader->catalogFile), thisNode, nodeBuf, nodeSize); if (status != CL_SUCCESS) { cli_dbgmsg("hfsplus_walk_catalog: node fetch failed.\n"); goto done; @@ -1316,6 +1323,11 @@ stream.next_out = uncompressed_block; extracted_file = true; + + if (stream.avail_in > 0 && Z_STREAM_END == z_ret) { + cli_dbgmsg("hfsplus_walk_catalog: Reached end of stream even though there's still some available bytes left!\n"); + break; + } } } else { if (cli_writen(ofd, &block[streamBeginning ? 1 : 0], readLen - (streamBeginning ? 1 : 0)) != readLen - (streamBeginning ? 1 : 0)) { diff -Nru libclamunrar-1.0.0/libclamav/matcher-ac.c libclamunrar-1.0.3/libclamav/matcher-ac.c --- libclamunrar-1.0.0/libclamav/matcher-ac.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/matcher-ac.c 2023-08-25 21:18:34.000000000 +0000 @@ -2951,6 +2951,7 @@ cli_warnmsg("cli_ac_addsig: cannot use filter for trie\n"); MPOOL_FREE(root->mempool, root->filter); root->filter = NULL; + return CL_EMALFDB; } /* TODO: should this affect maxpatlen? */ diff -Nru libclamunrar-1.0.0/libclamav/matcher-bm.c libclamunrar-1.0.3/libclamav/matcher-bm.c --- libclamunrar-1.0.0/libclamav/matcher-bm.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/matcher-bm.c 2023-08-25 21:18:34.000000000 +0000 @@ -72,6 +72,7 @@ cli_warnmsg("cli_bm_addpatt: cannot use filter for trie\n"); MPOOL_FREE(root->mempool, root->filter); root->filter = NULL; + return CL_EMALFDB; } /* TODO: should this affect maxpatlen? */ } diff -Nru libclamunrar-1.0.0/libclamav/others.h libclamunrar-1.0.3/libclamav/others.h --- libclamunrar-1.0.0/libclamav/others.h 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/others.h 2023-08-25 21:18:34.000000000 +0000 @@ -73,7 +73,7 @@ * in re-enabling affected modules. */ -#define CL_FLEVEL 160 +#define CL_FLEVEL 163 #define CL_FLEVEL_DCONF CL_FLEVEL #define CL_FLEVEL_SIGTOOL CL_FLEVEL diff -Nru libclamunrar-1.0.0/libclamav/readdb.c libclamunrar-1.0.3/libclamav/readdb.c --- libclamunrar-1.0.0/libclamav/readdb.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/readdb.c 2023-08-25 21:18:34.000000000 +0000 @@ -4862,9 +4862,11 @@ if (fs) fclose(fs); - if (engine->cb_sigload_progress) { - /* Let the progress callback function know how we're doing */ - (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx); + if (CL_SUCCESS == ret) { + if (engine->cb_sigload_progress) { + /* Let the progress callback function know how we're doing */ + (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx); + } } return ret; diff -Nru libclamunrar-1.0.0/libclamav/rtf.c libclamunrar-1.0.3/libclamav/rtf.c --- libclamunrar-1.0.0/libclamav/rtf.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/rtf.c 2023-08-25 21:18:34.000000000 +0000 @@ -168,9 +168,11 @@ /* grow stack */ struct rtf_state* states; stack->stack_size += 128; - states = cli_realloc2(stack->states, stack->stack_size * sizeof(*stack->states)); - if (!states) + states = cli_realloc(stack->states, stack->stack_size * sizeof(*stack->states)); + if (!states) { + // Realloc failed. Note that stack->states has not been freed and must still be cleaned up by the caller. return CL_EMEM; + } stack->states = states; } stack->states[stack->stack_cnt++] = *state; diff -Nru libclamunrar-1.0.0/libclamav/scanners.c libclamunrar-1.0.3/libclamav/scanners.c --- libclamunrar-1.0.0/libclamav/scanners.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/scanners.c 2023-08-25 21:18:34.000000000 +0000 @@ -1632,7 +1632,8 @@ char *hash = NULL; char path[PATH_MAX]; char filename[PATH_MAX]; - int tempfd = -1; + int tempfd = -1; + char *tempfile = NULL; if (CL_SUCCESS != (ret = uniq_get(U, "dir", 3, &hash, &hashcnt))) { cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: uniq_get('dir') failed with ret code (%d)!\n", ret); @@ -1649,7 +1650,7 @@ if (CL_SUCCESS == find_file(filename, dir, path, sizeof(path))) { cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: Found dir file: %s\n", path); - if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros)) != CL_SUCCESS) { + if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros, &tempfile)) != CL_SUCCESS) { // FIXME: Since we only know the stream name of the OLE2 stream, but not its path inside the // OLE2 archive, we don't know if we have the right file. The only thing we can do is // iterate all of them until one succeeds. @@ -1693,6 +1694,14 @@ close(tempfd); tempfd = -1; + + if (tempfile) { + if (!ctx->engine->keeptmp) { + remove(tempfile); + } + free(tempfile); + tempfile = NULL; + } } hashcnt--; @@ -1704,6 +1713,14 @@ tempfd = -1; } + if (tempfile) { + if (!ctx->engine->keeptmp) { + remove(tempfile); + } + free(tempfile); + tempfile = NULL; + } + return ret; } @@ -5471,57 +5488,6 @@ status = cli_magic_scan(&ctx, CL_TYPE_ANY); - // If any alerts occurred, set the output pointer to the "latest" alert signature name. - if (0 < evidence_num_alerts(ctx.evidence)) { - *virname = cli_get_last_virus_str(&ctx); - verdict = CL_VIRUS; - } - - /* - * Report PUA alerts here. - */ - num_potentially_unwanted_indicators = evidence_num_indicators_type( - ctx.evidence, - IndicatorType_PotentiallyUnwanted); - if (0 != num_potentially_unwanted_indicators) { - // We have "potentially unwanted" indicators that would not have been reported yet. - // We may wish to report them now, ... depending .... - - if (ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) { - // We're in allmatch mode, so report all "potentially unwanted" matches now. - - size_t i; - - for (i = 0; i < num_potentially_unwanted_indicators; i++) { - const char *pua_alert = evidence_get_indicator( - ctx.evidence, - IndicatorType_PotentiallyUnwanted, - i); - - if (NULL != pua_alert) { - // We don't know exactly which layer the alert happened at. - // There's a decent chance it wasn't at this layer, and in that case we wouldn't - // even have access to that file anymore (it's gone!). So we'll pass back -1 for the - // file descriptor rather than using `cli_virus_found_cb() which would pass back - // The top level file descriptor. - if (ctx.engine->cb_virus_found) { - ctx.engine->cb_virus_found( - -1, - pua_alert, - ctx.cb_ctx); - } - } - } - - } else { - // Not allmatch mode. Only want to report one thing... - if (0 == evidence_num_indicators_type(ctx.evidence, IndicatorType_Strong)) { - // And it looks like we haven't reported anything else, so report the last "potentially unwanted" one. - cli_virus_found_cb(&ctx, cli_get_last_virus(&ctx)); - } - } - } - #if HAVE_JSON if (ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && (ctx.properties != NULL)) { json_object *jobj; @@ -5617,6 +5583,57 @@ } #endif // HAVE_JSON + // If any alerts occurred, set the output pointer to the "latest" alert signature name. + if (0 < evidence_num_alerts(ctx.evidence)) { + *virname = cli_get_last_virus_str(&ctx); + verdict = CL_VIRUS; + } + + /* + * Report PUA alerts here. + */ + num_potentially_unwanted_indicators = evidence_num_indicators_type( + ctx.evidence, + IndicatorType_PotentiallyUnwanted); + if (0 != num_potentially_unwanted_indicators) { + // We have "potentially unwanted" indicators that would not have been reported yet. + // We may wish to report them now, ... depending .... + + if (ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) { + // We're in allmatch mode, so report all "potentially unwanted" matches now. + + size_t i; + + for (i = 0; i < num_potentially_unwanted_indicators; i++) { + const char *pua_alert = evidence_get_indicator( + ctx.evidence, + IndicatorType_PotentiallyUnwanted, + i); + + if (NULL != pua_alert) { + // We don't know exactly which layer the alert happened at. + // There's a decent chance it wasn't at this layer, and in that case we wouldn't + // even have access to that file anymore (it's gone!). So we'll pass back -1 for the + // file descriptor rather than using `cli_virus_found_cb() which would pass back + // The top level file descriptor. + if (ctx.engine->cb_virus_found) { + ctx.engine->cb_virus_found( + -1, + pua_alert, + ctx.cb_ctx); + } + } + } + + } else { + // Not allmatch mode. Only want to report one thing... + if (0 == evidence_num_indicators_type(ctx.evidence, IndicatorType_Strong)) { + // And it looks like we haven't reported anything else, so report the last "potentially unwanted" one. + cli_virus_found_cb(&ctx, cli_get_last_virus(&ctx)); + } + } + } + if (verdict != CL_CLEAN) { // Reporting "VIRUS" is more important than reporting and error, // because... unfortunately we can only do one with the current API. diff -Nru libclamunrar-1.0.0/libclamav/vba_extract.c libclamunrar-1.0.3/libclamav/vba_extract.c --- libclamunrar-1.0.0/libclamav/vba_extract.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/vba_extract.c 2023-08-25 21:18:34.000000000 +0000 @@ -358,7 +358,7 @@ * Read a VBA project in an OLE directory. * Contrary to cli_vba_readdir, this function uses the dir file to locate VBA modules. */ -cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros) +cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros, char **tempfile) { cl_error_t ret = CL_SUCCESS; char fullname[1024]; @@ -367,7 +367,6 @@ size_t data_len; size_t data_offset; const char *stream_name = NULL; - char *tempfile = NULL; uint16_t codepage = CODEPAGE_ISO8859_1; unsigned i; char *mbcs_name = NULL, *utf16_name = NULL; @@ -375,7 +374,7 @@ unsigned char *module_data = NULL, *module_data_utf8 = NULL; size_t module_data_size = 0, module_data_utf8_size = 0; - if (dir == NULL || hash == NULL || tempfd == NULL || has_macros == NULL) { + if (dir == NULL || hash == NULL || tempfd == NULL || has_macros == NULL || tempfile == NULL) { return CL_EARG; } @@ -398,12 +397,12 @@ *has_macros = *has_macros + 1; - if ((ret = cli_gentempfd_with_prefix(ctx->sub_tmpdir, "vba_project", &tempfile, tempfd)) != CL_SUCCESS) { + if ((ret = cli_gentempfd_with_prefix(ctx->sub_tmpdir, "vba_project", tempfile, tempfd)) != CL_SUCCESS) { cli_warnmsg("vba_readdir_new: VBA project cannot be dumped to file\n"); goto done; } - cli_dbgmsg("Dumping VBA project from dir %s to file %s\n", fullname, tempfile); + cli_dbgmsg("Dumping VBA project from dir %s to file %s\n", fullname, *tempfile); #define CLI_WRITEN(msg, size) \ do { \ @@ -1305,9 +1304,6 @@ if (stream_name) { free((void *)stream_name); } - if (tempfile) { - free(tempfile); - } if (ret != CL_SUCCESS && *tempfd >= 0) { close(*tempfd); *tempfd = -1; @@ -1972,7 +1968,7 @@ uint32_t len __attribute__((packed)); uint32_t state __attribute__((packed)); uint32_t offset __attribute__((packed)); - } * m; + } *m; const struct macro *n; #ifdef HAVE_PRAGMA_PACK #pragma pack() diff -Nru libclamunrar-1.0.0/libclamav/vba_extract.h libclamunrar-1.0.3/libclamav/vba_extract.h --- libclamunrar-1.0.0/libclamav/vba_extract.h 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/vba_extract.h 2023-08-25 21:18:34.000000000 +0000 @@ -40,7 +40,7 @@ } vba_project_t; vba_project_t *cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which); -cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros); +cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros, char **tempfile); vba_project_t *cli_wm_readdir(int fd); void cli_free_vba_project(vba_project_t *vba_project); diff -Nru libclamunrar-1.0.0/libclamav/xlm_extract.c libclamunrar-1.0.3/libclamav/xlm_extract.c --- libclamunrar-1.0.0/libclamav/xlm_extract.c 2022-11-23 18:54:48.000000000 +0000 +++ libclamunrar-1.0.3/libclamav/xlm_extract.c 2023-08-25 21:18:34.000000000 +0000 @@ -4994,6 +4994,9 @@ FREE(data); + if (tempfile && !ctx->engine->keeptmp) { + remove(tempfile); + } FREE(tempfile); return status; diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"c9b1b15e299ba4e6ed0d6f25cde30b26b13b6068a7fbd980000c37bca19b0104","DESIGN.md":"64ff45ea2a89d4c32b29af91acb7743a861fcac417cb94fde8e6559405d603b2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5999e5768f5da8ab9b50c016766b5185b4c79936c56bef6d311ddcb0a38c4b94","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"b92c9a65c4ee8029ff5a710aa1514caf838e73072c177dff5375463769f0b1ce","src/automaton.rs":"931af0aad03079bc4f6400d573fce832ce1edeeaf196815a16750d57b54b2183","src/buffer.rs":"dae7ee7c1f846ca9cf115ba4949484000e1837b4fb7311f8d8c9a35011c9c26f","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"99a53a2ed8eea8c13699def90e31dfdff9d0b90572b1db3cb534e3396e7a0ed0","src/dfa.rs":"25e4455b3e179a7e192108d05f3683993456b36e3ebed99f827558c52525b7e6","src/error.rs":"d34c2c9c815df5d9dedc46b4b3ce109cd2cee07825de643f0c574ec960367beb","src/lib.rs":"7a47d4c87f83e0e7ddf0777a71e4858904e73477ce18404cb89e656070e86aef","src/nfa.rs":"3b817b4aa85540e8c0d35aff7ed7cfbab70ec7d2aaa779d63b4f5369bff31ce1","src/packed/api.rs":"df42e7500c94c9de1ac44145a0dd99ea02047e6bba229da12f2575337beebcf0","src/packed/mod.rs":"ad2f8e18996737347a1181a4457387276d139315bcabfc5e34494af0c0319701","src/packed/pattern.rs":"3abf3835d4c4f8a43753c52936a894d819f713f233fc046e19de5ef95200dcce","src/packed/rabinkarp.rs":"ad7d4533f96aed336e29c5553657ae57b0d733ace9c707a6cf4d08d8fc6edee5","src/packed/teddy/README.md":"b4b83fb5afafbbea6cb76fe70f49cc8ced888f682d98abe5ea5773e95d9ec2b0","src/packed/teddy/compile.rs":"aad40b3f93d2c388b409b31fb2795d414a365237789d5b1a7510d97ceb8ce260","src/packed/teddy/mod.rs":"83b52bd80272970ad17234d0db293d17c1710ec582302bf516b203c8edec037e","src/packed/teddy/runtime.rs":"836146e90b320b14fa2c65fe4af7915a41f6fb04408aac5fac731c22ff46adae","src/packed/tests.rs":"b8dc4d3281ecd6d0fa2bf7ef16cf292a467dfdce64e470c7921e983bfa60fee2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"82a3eb6d5c0c3f10bc8d5f57d55d6d14cf4cf21c475bb5253e1921084063b8d7","src/state_id.rs":"519ec8c7bf3fa72103d4c561c193759759f535dca924c9853efe630f406d2029","src/tests.rs":"ee9b85f3c27cb2fba5796e5be8019aafecc13ee9a4f614553f2bc8953f51c6de"},"package":"cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"} \ No newline at end of file diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/COPYING libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/COPYING --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/COPYING 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/COPYING 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2018" -name = "aho-corasick" -version = "0.7.20" -authors = ["Andrew Gallant "] -exclude = ["/aho-corasick-debug"] -autotests = false -description = "Fast multiple substring searching." -homepage = "https://github.com/BurntSushi/aho-corasick" -readme = "README.md" -keywords = [ - "string", - "search", - "text", - "aho", - "multi", -] -categories = ["text-processing"] -license = "Unlicense OR MIT" -repository = "https://github.com/BurntSushi/aho-corasick" - -[profile.bench] -debug = true - -[profile.release] -debug = true - -[lib] -name = "aho_corasick" - -[dependencies.memchr] -version = "2.4.0" -default-features = false - -[dev-dependencies] - -[features] -default = ["std"] -std = ["memchr/std"] diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,483 +0,0 @@ -This document describes the internal design of this crate, which is an object -lesson in what happens when you take a fairly simple old algorithm like -Aho-Corasick and make it fast and production ready. - -The target audience of this document is Rust programmers that have some -familiarity with string searching, however, one does not need to know the -Aho-Corasick algorithm in order to read this (it is explained below). One -should, however, know what a trie is. (If you don't, go read its Wikipedia -article.) - -The center-piece of this crate is an implementation of Aho-Corasick. On its -own, Aho-Corasick isn't that complicated. The complex pieces come from the -different variants of Aho-Corasick implemented in this crate. Specifically, -they are: - -* Aho-Corasick as an NFA, using dense transitions near the root with sparse - transitions elsewhere. -* Aho-Corasick as a DFA. (An NFA is slower to search, but cheaper to construct - and uses less memory.) - * A DFA with pre-multiplied state identifiers. This saves a multiplication - instruction in the core search loop. - * A DFA with equivalence classes of bytes as the alphabet, instead of the - traditional 256-byte alphabet. This shrinks the size of the DFA in memory, - but adds an extra lookup in the core search loop to map the input byte to - an equivalent class. -* The option to choose how state identifiers are represented, via one of - u8, u16, u32, u64 or usize. This permits creating compact automatons when - matching a small number of patterns. -* Supporting "standard" match semantics, along with its overlapping variant, - in addition to leftmost-first and leftmost-longest semantics. The "standard" - semantics are typically what you see in a textbook description of - Aho-Corasick. However, Aho-Corasick is also useful as an optimization in - regex engines, which often use leftmost-first or leftmost-longest semantics. - Thus, it is useful to implement those semantics here. The "standard" and - "leftmost" search algorithms are subtly different, and also require slightly - different construction algorithms. -* Support for ASCII case insensitive matching. -* Support for accelerating searches when the patterns all start with a small - number of fixed bytes. Or alternatively, when the patterns all contain a - small number of rare bytes. (Searching for these bytes uses SIMD vectorized - code courtesy of `memchr`.) -* Transparent support for alternative SIMD vectorized search routines for - smaller number of literals, such as the Teddy algorithm. We called these - "packed" search routines because they use SIMD. They can often be an order of - magnitude faster than just Aho-Corasick, but don't scale as well. -* Support for searching streams. This can reuse most of the underlying code, - but does require careful buffering support. -* Support for anchored searches, which permit efficient `is_prefix` checks for - a large number of patterns. - -When you combine all of this together along with trying to make everything as -fast as possible, what you end up with is enitrely too much code with too much -`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead, -we will explain it. - - -# Basics - -The fundamental problem this crate is trying to solve is to determine the -occurrences of possibly many patterns in a haystack. The naive way to solve -this is to look for a match for each pattern at each position in the haystack: - - for i in 0..haystack.len(): - for p in patterns.iter(): - if haystack[i..].starts_with(p.bytes()): - return Match(p.id(), i, i + p.bytes().len()) - -Those four lines are effectively all this crate does. The problem with those -four lines is that they are very slow, especially when you're searching for a -large number of patterns. - -While there are many different algorithms available to solve this, a popular -one is Aho-Corasick. It's a common solution because it's not too hard to -implement, scales quite well even when searching for thousands of patterns and -is generally pretty fast. Aho-Corasick does well here because, regardless of -the number of patterns you're searching for, it always visits each byte in the -haystack exactly once. This means, generally speaking, adding more patterns to -an Aho-Corasick automaton does not make it slower. (Strictly speaking, however, -this is not true, since a larger automaton will make less effective use of the -CPU's cache.) - -Aho-Corasick can be succinctly described as a trie with state transitions -between some of the nodes that efficiently instruct the search algorithm to -try matching alternative keys in the automaton. The trick is that these state -transitions are arranged such that each byte of input needs to be inspected -only once. These state transitions are typically called "failure transitions," -because they instruct the searcher (the thing traversing the automaton while -reading from the haystack) what to do when a byte in the haystack does not -correspond to a valid transition in the current state of the trie. - -More formally, a failure transition points to a state in the automaton that may -lead to a match whose prefix is a proper suffix of the path traversed through -the trie so far. (If no such proper suffix exists, then the failure transition -points back to the start state of the trie, effectively restarting the search.) -This is perhaps simpler to explain pictorally. For example, let's say we built -an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The -trie looks like this: - - a - S1 - b - S2 - c - S3 - d - S4* - / - S0 - c - S5 - e - S6 - f - S7* - -where states marked with a `*` are match states (meaning, the search algorithm -should stop and report a match to the caller). - -So given this trie, it should be somewhat straight-forward to see how it can -be used to determine whether any particular haystack *starts* with either -`abcd` or `cef`. It's easy to express this in code: - - fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool { - let mut state_id = trie.start(); - // If the empty pattern is in trie, then state_id is a match state. - if trie.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - state_id = match trie.next_state(state_id, b) { - Some(id) => id, - // If there was no transition for this state and byte, then we know - // the haystack does not start with one of the patterns in our trie. - None => return false, - }; - if trie.is_match(state_id) { - return true; - } - } - false - } - -And that's pretty much it. All we do is move through the trie starting with the -bytes at the beginning of the haystack. If we find ourselves in a position -where we can't move, or if we've looked through the entire haystack without -seeing a match state, then we know the haystack does not start with any of the -patterns in the trie. - -The meat of the Aho-Corasick algorithm is in how we add failure transitions to -our trie to keep searching efficient. Specifically, it permits us to not only -check whether a haystack *starts* with any one of a number of patterns, but -rather, whether the haystack contains any of a number of patterns *anywhere* in -the haystack. - -As mentioned before, failure transitions connect a proper suffix of the path -traversed through the trie before, with a path that leads to a match that has a -prefix corresponding to that proper suffix. So in our case, for patterns `abcd` -and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from -the diagram above) from `S3` upon seeing that the byte following `c` is not -`d`. Namely, the proper suffix in this example is `c`, which is a prefix of -`cef`. So the modified diagram looks like this: - - - a - S1 - b - S2 - c - S3 - d - S4* - / / - / ---------------- - / / - S0 - c - S5 - e - S6 - f - S7* - -One thing that isn't shown in this diagram is that *all* states have a failure -transition, but only `S3` has a *non-trivial* failure transition. That is, all -other states have a failure transition back to the start state. So if our -haystack was `abzabcd`, then the searcher would transition back to `S0` after -seeing `z`, which effectively restarts the search. (Because there is no pattern -in our trie that has a prefix of `bz` or `z`.) - -The code for traversing this *automaton* or *finite state machine* (it is no -longer just a trie) is not that much different from the `has_prefix` code -above: - - fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool { - let mut state_id = fsm.start(); - // If the empty pattern is in fsm, then state_id is a match state. - if fsm.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - // While the diagram above doesn't show this, we may wind up needing - // to follow multiple failure transitions before we land on a state - // in which we can advance. Therefore, when searching for the next - // state, we need to loop until we don't see a failure transition. - // - // This loop terminates because the start state has no empty - // transitions. Every transition from the start state either points to - // another state, or loops back to the start state. - loop { - match fsm.next_state(state_id, b) { - Some(id) => { - state_id = id; - break; - } - // Unlike our code above, if there was no transition for this - // state, then we don't quit. Instead, we look for this state's - // failure transition and follow that instead. - None => { - state_id = fsm.next_fail_state(state_id); - } - }; - } - if fsm.is_match(state_id) { - return true; - } - } - false - } - -Other than the complication around traversing failure transitions, this code -is still roughly "traverse the automaton with bytes from the haystack, and quit -when a match is seen." - -And that concludes our section on the basics. While we didn't go deep into -how the automaton is built (see `src/nfa.rs`, which has detailed comments about -that), the basic structure of Aho-Corasick should be reasonably clear. - - -# NFAs and DFAs - -There are generally two types of finite automata: non-deterministic finite -automata (NFA) and deterministic finite automata (DFA). The difference between -them is, principally, that an NFA can be in multiple states at once. This is -typically accomplished by things called _epsilon_ transitions, where one could -move to a new state without consuming any bytes from the input. (The other -mechanism by which NFAs can be in more than one state is where the same byte in -a particular state transitions to multiple distinct states.) In contrast, a DFA -can only ever be in one state at a time. A DFA has no epsilon transitions, and -for any given state, a byte transitions to at most one other state. - -By this formulation, the Aho-Corasick automaton described in the previous -section is an NFA. This is because failure transitions are, effectively, -epsilon transitions. That is, whenever the automaton is in state `S`, it is -actually in the set of states that are reachable by recursively following -failure transitions from `S`. (This means that, for example, the start state -is always active since the start state is reachable via failure transitions -from any state in the automaton.) - -NFAs have a lot of nice properties. They tend to be easier to construct, and -also tend to use less memory. However, their primary downside is that they are -typically slower to execute. For example, the code above showing how to search -with an Aho-Corasick automaton needs to potentially iterate through many -failure transitions for every byte of input. While this is a fairly small -amount of overhead, this can add up, especially if the automaton has a lot of -overlapping patterns with a lot of failure transitions. - -A DFA's search code, by contrast, looks like this: - - fn contains(dfa: &DFA, haystack: &[u8]) -> bool { - let mut state_id = dfa.start(); - // If the empty pattern is in dfa, then state_id is a match state. - if dfa.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - // An Aho-Corasick DFA *never* has a missing state that requires - // failure transitions to be followed. One byte of input advances the - // automaton by one state. Always. - state_id = dfa.next_state(state_id, b); - if dfa.is_match(state_id) { - return true; - } - } - false - } - -The search logic here is much simpler than for the NFA, and this tends to -translate into significant performance benefits as well, since there's a lot -less work being done for each byte in the haystack. How is this accomplished? -It's done by pre-following all failure transitions for all states for all bytes -in the alphabet, and then building a single state transition table. Building -this DFA can be much more costly than building the NFA, and use much more -memory, but the better performance can be worth it. - -Users of this crate can actually choose between using an NFA or a DFA. By -default, an NFA is used, because it typically strikes the best balance between -space usage and search performance. But the DFA option is available for cases -where a little extra memory and upfront time building the automaton is okay. -For example, the `AhoCorasick::auto_configure` and -`AhoCorasickBuilder::auto_configure` methods will enable the DFA setting if -there are a small number of patterns. - - -# More DFA tricks - -As described in the previous section, one of the downsides of using a DFA -is that it uses more memory and can take longer to build. One small way of -mitigating these concerns is to map the alphabet used by the automaton into -a smaller space. Typically, the alphabet of a DFA has 256 elements in it: -one element for each possible value that fits into a byte. However, in many -cases, one does not need the full alphabet. For example, if all patterns in an -Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct -bytes. As far as the automaton is concerned, the rest of the 204 bytes are -indistinguishable from one another: they will never disrciminate between a -match or a non-match. Therefore, in cases like that, the alphabet can be shrunk -to just 53 elements. One for each ASCII letter, and then another to serve as a -placeholder for every other unused byte. - -In practice, this library doesn't quite compute the optimal set of equivalence -classes, but it's close enough in most cases. The key idea is that this then -allows the transition table for the DFA to be potentially much smaller. The -downside of doing this, however, is that since the transition table is defined -in terms of this smaller alphabet space, every byte in the haystack must be -re-mapped to this smaller space. This requires an additional 256-byte table. -In practice, this can lead to a small search time hit, but it can be difficult -to measure. Moreover, it can sometimes lead to faster search times for bigger -automata, since it could be difference between more parts of the automaton -staying in the CPU cache or not. - -One other trick for DFAs employed by this crate is the notion of premultiplying -state identifiers. Specifically, the normal way to compute the next transition -in a DFA is via the following (assuming that the transition table is laid out -sequentially in memory, in row-major order, where the rows are states): - - next_state_id = dfa.transitions[current_state_id * 256 + current_byte] - -However, since the value `256` is a fixed constant, we can actually premultiply -the state identifiers in the table when we build the table initially. Then, the -next transition computation simply becomes: - - next_state_id = dfa.transitions[current_state_id + current_byte] - -This doesn't seem like much, but when this is being executed for every byte of -input that you're searching, saving that extra multiplication instruction can -add up. - -The same optimization works even when equivalence classes are enabled, as -described above. The only difference is that the premultiplication is by the -total number of equivalence classes instead of 256. - -There isn't much downside to premultiplying state identifiers, other than the -fact that you may need to choose a bigger integer representation than you would -otherwise. For example, if you don't premultiply state identifiers, then an -automaton that uses `u8` as a state identifier can hold up to 256 states. -However, if they are premultiplied, then it can only hold up to -`floor(256 / len(alphabet))` states. Thus premultiplication impacts how compact -your DFA can be. In practice, it's pretty rare to use `u8` as a state -identifier, so premultiplication is usually a good thing to do. - -Both equivalence classes and premultiplication are tuneable parameters via the -`AhoCorasickBuilder` type, and both are enabled by default. - - -# Match semantics - -One of the more interesting things about this implementation of Aho-Corasick -that (as far as this author knows) separates it from other implementations, is -that it natively supports leftmost-first and leftmost-longest match semantics. -Briefly, match semantics refer to the decision procedure by which searching -will disambiguate matches when there are multiple to choose from: - -* **standard** match semantics emits matches as soon as they are detected by - the automaton. This is typically equivalent to the textbook non-overlapping - formulation of Aho-Corasick. -* **leftmost-first** match semantics means that 1) the next match is the match - starting at the leftmost position and 2) among multiple matches starting at - the same leftmost position, the match corresponding to the pattern provided - first by the caller is reported. -* **leftmost-longest** is like leftmost-first, except when there are multiple - matches starting at the same leftmost position, the pattern corresponding to - the longest match is returned. - -(The crate API documentation discusses these differences, with examples, in -more depth on the `MatchKind` type.) - -The reason why supporting these match semantics is important is because it -gives the user more control over the match procedure. For example, -leftmost-first permits users to implement match priority by simply putting the -higher priority patterns first. Leftmost-longest, on the other hand, permits -finding the longest possible match, which might be useful when trying to find -words matching a dictionary. Additionally, regex engines often want to use -Aho-Corasick as an optimization when searching for an alternation of literals. -In order to preserve correct match semantics, regex engines typically can't use -the standard textbook definition directly, since regex engines will implement -either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics. - -Supporting leftmost semantics requires a couple key changes: - -* Constructing the Aho-Corasick automaton changes a bit in both how the trie is - constructed and how failure transitions are found. Namely, only a subset of - the failure transitions are added. Specifically, only the failure transitions - that either do not occur after a match or do occur after a match but preserve - that match are kept. (More details on this can be found in `src/nfa.rs`.) -* The search algorithm changes slightly. Since we are looking for the leftmost - match, we cannot quit as soon as a match is detected. Instead, after a match - is detected, we must keep searching until either the end of the input or - until a dead state is seen. (Dead states are not used for standard match - semantics. Dead states mean that searching should stop after a match has been - found.) - -Other implementations of Aho-Corasick do support leftmost match semantics, but -they do it with more overhead at search time, or even worse, with a queue of -matches and sophisticated hijinks to disambiguate the matches. While our -construction algorithm becomes a bit more complicated, the correct match -semantics fall out from the structure of the automaton itself. - - -# Overlapping matches - -One of the nice properties of an Aho-Corasick automaton is that it can report -all possible matches, even when they overlap with one another. In this mode, -the match semantics don't matter, since all possible matches are reported. -Overlapping searches work just like regular searches, except the state -identifier at which the previous search left off is carried over to the next -search, so that it can pick up where it left off. If there are additional -matches at that state, then they are reported before resuming the search. - -Enabling leftmost-first or leftmost-longest match semantics causes the -automaton to use a subset of all failure transitions, which means that -overlapping searches cannot be used. Therefore, if leftmost match semantics are -used, attempting to do an overlapping search will panic. Thus, to get -overlapping searches, the caller must use the default standard match semantics. -This behavior was chosen because there are only two alternatives, which were -deemed worse: - -* Compile two automatons internally, one for standard semantics and one for - the semantics requested by the caller (if not standard). -* Create a new type, distinct from the `AhoCorasick` type, which has different - capabilities based on the configuration options. - -The first is untenable because of the amount of memory used by the automaton. -The second increases the complexity of the API too much by adding too many -types that do similar things. It is conceptually much simpler to keep all -searching isolated to a single type. Callers may query whether the automaton -supports overlapping searches via the `AhoCorasick::supports_overlapping` -method. - - -# Stream searching - -Since Aho-Corasick is an automaton, it is possible to do partial searches on -partial parts of the haystack, and then resume that search on subsequent pieces -of the haystack. This is useful when the haystack you're trying to search is -not stored contiguously in memory, or if one does not want to read the entire -haystack into memory at once. - -Currently, only standard semantics are supported for stream searching. This is -some of the more complicated code in this crate, and is something I would very -much like to improve. In particular, it currently has the restriction that it -must buffer at least enough of the haystack in memory in order to fit the -longest possible match. The difficulty in getting stream searching right is -that the implementation choices (such as the buffer size) often impact what the -API looks like and what it's allowed to do. - - -# Prefilters - -In some cases, Aho-Corasick is not the fastest way to find matches containing -multiple patterns. Sometimes, the search can be accelerated using highly -optimized SIMD routines. For example, consider searching the following -patterns: - - Sherlock - Moriarty - Watson - -It is plausible that it would be much faster to quickly look for occurrences of -the leading bytes, `S`, `M` or `W`, before trying to start searching via the -automaton. Indeed, this is exactly what this crate will do. - -When there are more than three distinct starting bytes, then this crate will -look for three distinct bytes occurring at any position in the patterns, while -preferring bytes that are heuristically determined to be rare over others. For -example: - - Abuzz - Sanchez - Vasquez - Topaz - Waltz - -Here, we have more than 3 distinct starting bytes, but all of the patterns -contain `z`, which is typically a rare byte. In this case, the prefilter will -scan for `z`, back up a bit, and then execute the Aho-Corasick automaton. - -If all of that fails, then a packed multiple substring algorithm will be -attempted. Currently, the only algorithm available for this is Teddy, but more -may be added in the future. Teddy is unlike the above prefilters in that it -confirms its own matches, so when Teddy is active, it might not be necessary -for Aho-Corasick to run at all. (See `Automaton::leftmost_find_at_no_state_imp` -in `src/automaton.rs`.) However, the current Teddy implementation only works -in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only works -_well_ when there are a small number of patterns (say, less than 100). Teddy -also requires the haystack to be of a certain length (more than 16-34 bytes). -When the haystack is shorter than that, Rabin-Karp is used instead. (See -`src/packed/rabinkarp.rs`.) - -There is a more thorough description of Teddy at -[`src/packed/teddy/README.md`](src/packed/teddy/README.md). diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/README.md libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/README.md --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/README.md 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,187 +0,0 @@ -aho-corasick -============ -A library for finding occurrences of many patterns at once with SIMD -acceleration in some cases. This library provides multiple pattern -search principally through an implementation of the -[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm), -which builds a finite state machine for executing searches in linear time. -Features include case insensitive matching, overlapping matches, fast searching -via SIMD and optional full DFA construction and search & replace in streams. - -[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions) -[![crates.io](https://img.shields.io/crates/v/aho-corasick.svg)](https://crates.io/crates/aho-corasick) - -Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). - - -### Documentation - -https://docs.rs/aho-corasick - - -### Usage - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -aho-corasick = "0.7" -``` - - -### Example: basic searching - -This example shows how to search for occurrences of multiple patterns -simultaneously. Each match includes the pattern that matched along with the -byte offsets of the match. - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["apple", "maple", "Snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasick::new(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - - -### Example: case insensitivity - -This is like the previous example, but matches `Snapple` case insensitively -using `AhoCorasickBuilder`: - -```rust -use aho_corasick::AhoCorasickBuilder; - -let patterns = &["apple", "maple", "snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - - -### Example: replacing matches in a stream - -This example shows how to execute a search and replace on a stream without -loading the entire stream into memory first. - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["fox", "brown", "quick"]; -let replace_with = &["sloth", "grey", "slow"]; - -// In a real example, these might be `std::fs::File`s instead. All you need to -// do is supply a pair of `std::io::Read` and `std::io::Write` implementations. -let rdr = "The quick brown fox."; -let mut wtr = vec![]; - -let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with) - .expect("stream_replace_all failed"); -assert_eq!(b"The slow grey sloth.".to_vec(), wtr); -``` - - -### Example: finding the leftmost first match - -In the textbook description of Aho-Corasick, its formulation is typically -structured such that it reports all possible matches, even when they overlap -with another. In many cases, overlapping matches may not be desired, such as -the case of finding all successive non-overlapping matches like you might with -a standard regular expression. - -Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do -this doesn't always work in the expected way, since it will report matches as -soon as they are seen. For example, consider matching the regex `Samwise|Sam` -against the text `Samwise`. Most regex engines (that are Perl-like, or -non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick -algorithm modified for reporting non-overlapping matches will report `Sam`. - -A novel contribution of this library is the ability to change the match -semantics of Aho-Corasick (without additional search time overhead) such that -`Samwise` is reported instead. For example, here's the standard approach: - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasick::new(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Sam", &haystack[mat.start()..mat.end()]); -``` - -And now here's the leftmost-first version, which matches how a Perl-like -regex will work: - -```rust -use aho_corasick::{AhoCorasickBuilder, MatchKind}; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); -``` - -In addition to leftmost-first semantics, this library also supports -leftmost-longest semantics, which match the POSIX behavior of a regular -expression alternation. See `MatchKind` in the docs for more details. - - -### Minimum Rust version policy - -This crate's minimum supported `rustc` version is `1.41.1`. - -The current policy is that the minimum Rust version required to use this crate -can be increased in minor version updates. For example, if `crate 1.0` requires -Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust -1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum -version of Rust. - -In general, this crate will be conservative with respect to the minimum -supported version of Rust. - - -### FFI bindings - -* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/) -is a Python wrapper for this library. - - -### Future work - -Here are some plans for the future: - -* Assuming the current API is sufficient, I'd like to commit to it and release - a `1.0` version of this crate some time in the next 6-12 months. -* Support stream searching with leftmost match semantics. Currently, only - standard match semantics are supported. Getting this right seems possible, - but is tricky since the match state needs to be propagated through multiple - searches. (With standard semantics, as soon as a match is seen the search - ends.) diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -max_width = 79 -use_small_heuristics = "max" diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,2141 +0,0 @@ -use std::io; - -use crate::automaton::Automaton; -use crate::buffer::Buffer; -use crate::dfa::{self, DFA}; -use crate::error::Result; -use crate::nfa::{self, NFA}; -use crate::packed; -use crate::prefilter::{Prefilter, PrefilterState}; -use crate::state_id::StateID; -use crate::Match; - -/// An automaton for searching multiple strings in linear time. -/// -/// The `AhoCorasick` type supports a few basic ways of constructing an -/// automaton, including -/// [`AhoCorasick::new`](struct.AhoCorasick.html#method.new) -/// and -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). -/// However, there are a fair number of configurable options that can be set -/// by using -/// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) -/// instead. Such options include, but are not limited to, how matches are -/// determined, simple case insensitivity, whether to use a DFA or not and -/// various knobs for controlling the space-vs-time trade offs taken when -/// building the automaton. -/// -/// If you aren't sure where to start, try beginning with -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). -/// -/// # Resource usage -/// -/// Aho-Corasick automatons are always constructed in `O(p)` time, where `p` -/// is the combined length of all patterns being searched. With that said, -/// building an automaton can be fairly costly because of high constant -/// factors, particularly when enabling the -/// [DFA](struct.AhoCorasickBuilder.html#method.dfa) -/// option (which is disabled by default). For this reason, it's generally a -/// good idea to build an automaton once and reuse it as much as possible. -/// -/// Aho-Corasick automatons can also use a fair bit of memory. To get a -/// concrete idea of how much memory is being used, try using the -/// [`AhoCorasick::heap_bytes`](struct.AhoCorasick.html#method.heap_bytes) -/// method. -/// -/// # Examples -/// -/// This example shows how to search for occurrences of multiple patterns -/// simultaneously in a case insensitive fashion. Each match includes the -/// pattern that matched along with the byte offsets of the match. -/// -/// ``` -/// use aho_corasick::AhoCorasickBuilder; -/// -/// let patterns = &["apple", "maple", "snapple"]; -/// let haystack = "Nobody likes maple in their apple flavored Snapple."; -/// -/// let ac = AhoCorasickBuilder::new() -/// .ascii_case_insensitive(true) -/// .build(patterns); -/// let mut matches = vec![]; -/// for mat in ac.find_iter(haystack) { -/// matches.push((mat.pattern(), mat.start(), mat.end())); -/// } -/// assert_eq!(matches, vec![ -/// (1, 13, 18), -/// (0, 28, 33), -/// (2, 43, 50), -/// ]); -/// ``` -/// -/// This example shows how to replace matches with some other string: -/// -/// ``` -/// use aho_corasick::AhoCorasick; -/// -/// let patterns = &["fox", "brown", "quick"]; -/// let haystack = "The quick brown fox."; -/// let replace_with = &["sloth", "grey", "slow"]; -/// -/// let ac = AhoCorasick::new(patterns); -/// let result = ac.replace_all(haystack, replace_with); -/// assert_eq!(result, "The slow grey sloth."); -/// ``` -#[derive(Clone, Debug)] -pub struct AhoCorasick { - imp: Imp, - match_kind: MatchKind, -} - -impl AhoCorasick { - /// Create a new Aho-Corasick automaton using the default configuration. - /// - /// The default configuration optimizes for less space usage, but at the - /// expense of longer search times. To change the configuration, use - /// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) - /// for fine-grained control, or - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// for automatic configuration if you aren't sure which settings to pick. - /// - /// This uses the default - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// match semantics, which reports a match as soon as it is found. This - /// corresponds to the standard match semantics supported by textbook - /// descriptions of the Aho-Corasick algorithm. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn new(patterns: I) -> AhoCorasick - where - I: IntoIterator, - P: AsRef<[u8]>, - { - AhoCorasickBuilder::new().build(patterns) - } - - /// Build an Aho-Corasick automaton with an automatically determined - /// configuration. - /// - /// Specifically, this requires a slice of patterns instead of an iterator - /// since the configuration is determined by looking at the patterns before - /// constructing the automaton. The idea here is to balance space and time - /// automatically. That is, when searching a small number of patterns, this - /// will attempt to use the fastest possible configuration since the total - /// space required will be small anyway. As the number of patterns grows, - /// this will fall back to slower configurations that use less space. - /// - /// If you want auto configuration but with match semantics different from - /// the default `MatchKind::Standard`, then use - /// [`AhoCorasickBuilder::auto_configure`](struct.AhoCorasickBuilder.html#method.auto_configure). - /// - /// # Examples - /// - /// Basic usage is just like `new`, except you must provide a slice: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new_auto_configured(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn new_auto_configured(patterns: &[B]) -> AhoCorasick - where - B: AsRef<[u8]>, - { - AhoCorasickBuilder::new().auto_configure(patterns).build(patterns) - } -} - -impl AhoCorasick { - /// Returns true if and only if this automaton matches the haystack at any - /// position. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert!(ac.is_match("xxx bar xxx")); - /// assert!(!ac.is_match("xxx qux xxx")); - /// ``` - pub fn is_match>(&self, haystack: B) -> bool { - self.earliest_find(haystack).is_some() - } - - /// Returns the location of the first detected match in `haystack`. - /// - /// This method has the same behavior regardless of the - /// [`MatchKind`](enum.MatchKind.html) - /// of this automaton. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "abc", "b", - /// ]); - /// let mat = ac.earliest_find("abcd").expect("should have match"); - /// assert_eq!(1, mat.pattern()); - /// assert_eq!((1, 2), (mat.start(), mat.end())); - /// ``` - pub fn earliest_find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - let mut start = self.imp.start_state(); - self.imp.earliest_find_at( - &mut prestate, - haystack.as_ref(), - 0, - &mut start, - ) - } - - /// Returns the location of the first match according to the match - /// semantics that this automaton was constructed with. - /// - /// When using `MatchKind::Standard`, this corresponds precisely to the - /// same behavior as - /// [`earliest_find`](struct.AhoCorasick.html#method.earliest_find). - /// Otherwise, match semantics correspond to either - /// [leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) - /// or - /// [leftmost-longest](enum.MatchKind.html#variant.LeftmostLongest). - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("b", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Now with leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// And finally, leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); - /// ``` - pub fn find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - self.imp.find_at_no_state(&mut prestate, haystack.as_ref(), 0) - } - - /// Returns an iterator of non-overlapping matches, using the match - /// semantics that this automaton was constructed with. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![2, 2, 2], matches); - /// ``` - /// - /// Now with leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 2, 0], matches); - /// ``` - /// - /// And finally, leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 2, 1], matches); - /// ``` - pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindIter<'a, 'b, S> { - FindIter::new(self, haystack.as_ref()) - } - - /// Returns an iterator of overlapping matches in the given `haystack`. - /// - /// Overlapping matches can _only_ be detected using - /// `MatchKind::Standard` semantics. If this automaton was constructed with - /// leftmost semantics, then this method will panic. To determine whether - /// this will panic at runtime, use the - /// [`AhoCorasick::supports_overlapping`](struct.AhoCorasick.html#method.supports_overlapping) - /// method. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_overlapping` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasick::new(patterns); - /// let matches: Vec = ac - /// .find_overlapping_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![2, 0, 2, 2, 0, 1], matches); - /// ``` - pub fn find_overlapping_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindOverlappingIter<'a, 'b, S> { - FindOverlappingIter::new(self, haystack.as_ref()) - } - - /// Replace all matches with a corresponding value in the `replace_with` - /// slice given. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// # Panics - /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let result = ac.replace_all(haystack, &["x", "y", "z"]); - /// assert_eq!("x the z to the xage", result); - /// ``` - pub fn replace_all(&self, haystack: &str, replace_with: &[B]) -> String - where - B: AsRef, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = String::with_capacity(haystack.len()); - self.replace_all_with(haystack, &mut dst, |mat, _, dst| { - dst.push_str(replace_with[mat.pattern()].as_ref()); - true - }); - dst - } - - /// Replace all matches using raw bytes with a corresponding value in the - /// `replace_with` slice given. Matches correspond to the same matches as - /// reported by [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// # Panics - /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = b"append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let result = ac.replace_all_bytes(haystack, &["x", "y", "z"]); - /// assert_eq!(b"x the z to the xage".to_vec(), result); - /// ``` - pub fn replace_all_bytes( - &self, - haystack: &[u8], - replace_with: &[B], - ) -> Vec - where - B: AsRef<[u8]>, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all_bytes requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = Vec::with_capacity(haystack.len()); - self.replace_all_with_bytes(haystack, &mut dst, |mat, _, dst| { - dst.extend(replace_with[mat.pattern()].as_ref()); - true - }); - dst - } - - /// Replace all matches using a closure called on each match. - /// Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and a string buffer with which to write the replaced text - /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns `false`, then searching is stopped. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mut result = String::new(); - /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); - /// true - /// }); - /// assert_eq!("0 the 2 to the 0age", result); - /// ``` - /// - /// Stopping the replacement by returning `false` (continued from the - /// example above): - /// - /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// # let patterns = &["append", "appendage", "app"]; - /// # let haystack = "append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() - /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); - /// let mut result = String::new(); - /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); - /// mat.pattern() != 2 - /// }); - /// assert_eq!("0 the 2 to the appendage", result); - /// ``` - pub fn replace_all_with( - &self, - haystack: &str, - dst: &mut String, - mut replace_with: F, - ) where - F: FnMut(&Match, &str, &mut String) -> bool, - { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.push_str(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.push_str(&haystack[last_match..]); - } - - /// Replace all matches using raw bytes with a closure called on each - /// match. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and a byte buffer with which to write the replaced text - /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns `false`, then searching is stopped. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = b"append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mut result = vec![]; - /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); - /// true - /// }); - /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result); - /// ``` - /// - /// Stopping the replacement by returning `false` (continued from the - /// example above): - /// - /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// # let patterns = &["append", "appendage", "app"]; - /// # let haystack = b"append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() - /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); - /// let mut result = vec![]; - /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); - /// mat.pattern() != 2 - /// }); - /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result); - /// ``` - pub fn replace_all_with_bytes( - &self, - haystack: &[u8], - dst: &mut Vec, - mut replace_with: F, - ) where - F: FnMut(&Match, &[u8], &mut Vec) -> bool, - { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.extend(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.extend(&haystack[last_match..]); - } - - /// Returns an iterator of non-overlapping matches in the given - /// stream. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The matches yielded by this iterator use absolute position offsets in - /// the stream given, where the first byte has index `0`. Matches are - /// yieled until the stream is exhausted. - /// - /// Each item yielded by the iterator is an `io::Result`, where an - /// error is yielded if there was a problem reading from the reader given. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut matches = vec![]; - /// for result in ac.stream_find_iter(haystack.as_bytes()) { - /// let mat = result?; - /// matches.push(mat.pattern()); - /// } - /// assert_eq!(vec![2, 2, 2], matches); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_find_iter<'a, R: io::Read>( - &'a self, - rdr: R, - ) -> StreamFindIter<'a, R, S> { - StreamFindIter::new(self, rdr) - } - - /// Search for and replace all matches of this automaton in - /// the given reader, and write the replacements to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// After all matches are replaced, the writer is _not_ flushed. - /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; - /// let replace_with = &["sloth", "grey", "slow"]; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all(haystack.as_bytes(), &mut result, replace_with)?; - /// assert_eq!(b"The slow grey sloth.".to_vec(), result); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_replace_all( - &self, - rdr: R, - wtr: W, - replace_with: &[B], - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - B: AsRef<[u8]>, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "stream_replace_all requires a replacement for every pattern \ - in the automaton" - ); - self.stream_replace_all_with(rdr, wtr, |mat, _, wtr| { - wtr.write_all(replace_with[mat.pattern()].as_ref()) - }) - } - - /// Search the given reader and replace all matches of this automaton - /// using the given closure. The result is written to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and the writer with which to write the replaced text (if any). - /// - /// After all matches are replaced, the writer is _not_ flushed. - /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io::Write; - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all_with( - /// haystack.as_bytes(), - /// &mut result, - /// |mat, _, wtr| { - /// wtr.write_all(mat.pattern().to_string().as_bytes()) - /// }, - /// )?; - /// assert_eq!(b"The 2 1 0.".to_vec(), result); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_replace_all_with( - &self, - rdr: R, - mut wtr: W, - mut replace_with: F, - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>, - { - let mut it = StreamChunkIter::new(self, rdr); - while let Some(result) = it.next() { - let chunk = result?; - match chunk { - StreamChunk::NonMatch { bytes, .. } => { - wtr.write_all(bytes)?; - } - StreamChunk::Match { bytes, mat } => { - replace_with(&mat, bytes, &mut wtr)?; - } - } - } - Ok(()) - } - - /// Returns the match kind used by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasick, MatchKind}; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(&MatchKind::Standard, ac.match_kind()); - /// ``` - pub fn match_kind(&self) -> &MatchKind { - self.imp.match_kind() - } - - /// Returns the length of the longest pattern matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(4, ac.max_pattern_len()); - /// ``` - pub fn max_pattern_len(&self) -> usize { - self.imp.max_pattern_len() - } - - /// Return the total number of patterns matched by this automaton. - /// - /// This includes patterns that may never participate in a match. For - /// example, if - /// [`MatchKind::LeftmostFirst`](enum.MatchKind.html#variant.LeftmostFirst) - /// match semantics are used, and the patterns `Sam` and `Samwise` were - /// used to build the automaton, then `Samwise` can never participate in a - /// match because `Sam` will always take priority. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(3, ac.pattern_count()); - /// ``` - pub fn pattern_count(&self) -> usize { - self.imp.pattern_count() - } - - /// Returns true if and only if this automaton supports reporting - /// overlapping matches. - /// - /// If this returns false and overlapping matches are requested, then it - /// will result in a panic. - /// - /// Since leftmost matching is inherently incompatible with overlapping - /// matches, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports overlapping matches. This is unlikely to change in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_overlapping()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_overlapping()); - /// ``` - pub fn supports_overlapping(&self) -> bool { - self.match_kind.supports_overlapping() - } - - /// Returns true if and only if this automaton supports stream searching. - /// - /// If this returns false and stream searching (or replacing) is attempted, - /// then it will result in a panic. - /// - /// Currently, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports streaming. This may be expanded in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_stream()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_stream()); - /// ``` - pub fn supports_stream(&self) -> bool { - self.match_kind.supports_stream() - } - - /// Returns the approximate total amount of heap used by this automaton, in - /// units of bytes. - /// - /// # Examples - /// - /// This example shows the difference in heap usage between a few - /// configurations: - /// - /// ```ignore - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_336, ac.heap_bytes()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default - /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_384, ac.heap_bytes()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(true) - /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(1_248, ac.heap_bytes()); - /// ``` - pub fn heap_bytes(&self) -> usize { - match self.imp { - Imp::NFA(ref nfa) => nfa.heap_bytes(), - Imp::DFA(ref dfa) => dfa.heap_bytes(), - } - } -} - -/// The internal implementation of Aho-Corasick, which is either an NFA or -/// a DFA. The NFA is slower but uses less memory. The DFA is faster but uses -/// more memory. -#[derive(Clone, Debug)] -enum Imp { - NFA(NFA), - DFA(DFA), -} - -impl Imp { - /// Returns the type of match semantics implemented by this automaton. - fn match_kind(&self) -> &MatchKind { - match *self { - Imp::NFA(ref nfa) => nfa.match_kind(), - Imp::DFA(ref dfa) => dfa.match_kind(), - } - } - - /// Returns the identifier of the start state. - fn start_state(&self) -> S { - match *self { - Imp::NFA(ref nfa) => nfa.start_state(), - Imp::DFA(ref dfa) => dfa.start_state(), - } - } - - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for maintaining correct buffer sizes when - /// searching on streams. - fn max_pattern_len(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.max_pattern_len(), - Imp::DFA(ref dfa) => dfa.max_pattern_len(), - } - } - - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. The maximum matching pattern that can be - /// reported is exactly one less than this number. - fn pattern_count(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.pattern_count(), - Imp::DFA(ref dfa) => dfa.pattern_count(), - } - } - - /// Returns the prefilter object, if one exists, for the underlying - /// automaton. - fn prefilter(&self) -> Option<&dyn Prefilter> { - match *self { - Imp::NFA(ref nfa) => nfa.prefilter(), - Imp::DFA(ref dfa) => dfa.prefilter(), - } - } - - /// Returns true if and only if we should attempt to use a prefilter. - fn use_prefilter(&self) -> bool { - let p = match self.prefilter() { - None => return false, - Some(p) => p, - }; - !p.looks_for_non_start_of_match() - } - - #[inline(always)] - fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - Imp::DFA(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - } - } - - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => { - nfa.earliest_find_at(prestate, haystack, at, state_id) - } - Imp::DFA(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - } - } - - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.find_at_no_state(prestate, haystack, at), - Imp::DFA(ref dfa) => dfa.find_at_no_state(prestate, haystack, at), - } - } -} - -/// An iterator of non-overlapping matches in a particular haystack. -/// -/// This iterator yields matches according to the -/// [`MatchKind`](enum.MatchKind.html) -/// used by this automaton. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::find_iter`](struct.AhoCorasick.html#method.find_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -/// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, -} - -impl<'a, 'b, S: StateID> FindIter<'a, 'b, S> { - fn new(ac: &'a AhoCorasick, haystack: &'b [u8]) -> FindIter<'a, 'b, S> { - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindIter { fsm: &ac.imp, prestate, haystack, pos: 0 } - } -} - -impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> { - type Item = Match; - - fn next(&mut self) -> Option { - if self.pos > self.haystack.len() { - return None; - } - let result = self.fsm.find_at_no_state( - &mut self.prestate, - self.haystack, - self.pos, - ); - let mat = match result { - None => return None, - Some(mat) => mat, - }; - if mat.end() == self.pos { - // If the automaton can match the empty string and if we found an - // empty match, then we need to forcefully move the position. - self.pos += 1; - } else { - self.pos = mat.end(); - } - Some(mat) - } -} - -/// An iterator of overlapping matches in a particular haystack. -/// -/// This iterator will report all possible matches in a particular haystack, -/// even when the matches overlap. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::find_overlapping_iter`](struct.AhoCorasick.html#method.find_overlapping_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -/// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindOverlappingIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, - state_id: S, - match_index: usize, -} - -impl<'a, 'b, S: StateID> FindOverlappingIter<'a, 'b, S> { - fn new( - ac: &'a AhoCorasick, - haystack: &'b [u8], - ) -> FindOverlappingIter<'a, 'b, S> { - assert!( - ac.supports_overlapping(), - "automaton does not support overlapping searches" - ); - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindOverlappingIter { - fsm: &ac.imp, - prestate, - haystack, - pos: 0, - state_id: ac.imp.start_state(), - match_index: 0, - } - } -} - -impl<'a, 'b, S: StateID> Iterator for FindOverlappingIter<'a, 'b, S> { - type Item = Match; - - fn next(&mut self) -> Option { - let result = self.fsm.overlapping_find_at( - &mut self.prestate, - self.haystack, - self.pos, - &mut self.state_id, - &mut self.match_index, - ); - match result { - None => return None, - Some(m) => { - self.pos = m.end(); - Some(m) - } - } - } -} - -/// An iterator that reports Aho-Corasick matches in a stream. -/// -/// This iterator yields elements of type `io::Result`, where an error -/// is reported if there was a problem reading from the underlying stream. -/// The iterator terminates only when the underlying stream reaches `EOF`. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::stream_find_iter`](struct.AhoCorasick.html#method.stream_find_iter) -/// method. -/// -/// The type variable `R` refers to the `io::Read` stream that is being read -/// from. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -#[derive(Debug)] -pub struct StreamFindIter<'a, R, S: StateID> { - it: StreamChunkIter<'a, R, S>, -} - -impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamFindIter<'a, R, S> { - StreamFindIter { it: StreamChunkIter::new(ac, rdr) } - } -} - -impl<'a, R: io::Read, S: StateID> Iterator for StreamFindIter<'a, R, S> { - type Item = io::Result; - - fn next(&mut self) -> Option> { - loop { - match self.it.next() { - None => return None, - Some(Err(err)) => return Some(Err(err)), - Some(Ok(StreamChunk::NonMatch { .. })) => {} - Some(Ok(StreamChunk::Match { mat, .. })) => { - return Some(Ok(mat)); - } - } - } - } -} - -/// An iterator over chunks in an underlying reader. Each chunk either -/// corresponds to non-matching bytes or matching bytes, but all bytes from -/// the underlying reader are reported in sequence. There may be an arbitrary -/// number of non-matching chunks before seeing a matching chunk. -/// -/// N.B. This does not actually implement Iterator because we need to borrow -/// from the underlying reader. But conceptually, it's still an iterator. -#[derive(Debug)] -struct StreamChunkIter<'a, R, S: StateID> { - /// The AC automaton. - fsm: &'a Imp, - /// State associated with this automaton's prefilter. It is a heuristic - /// for stopping the prefilter if it's deemed ineffective. - prestate: PrefilterState, - /// The source of bytes we read from. - rdr: R, - /// A fixed size buffer. This is what we actually search. There are some - /// invariants around the buffer's size, namely, it must be big enough to - /// contain the longest possible match. - buf: Buffer, - /// The ID of the FSM state we're currently in. - state_id: S, - /// The current position at which to start the next search in `buf`. - search_pos: usize, - /// The absolute position of `search_pos`, where `0` corresponds to the - /// position of the first byte read from `rdr`. - absolute_pos: usize, - /// The ending position of the last StreamChunk that was returned to the - /// caller. This position is used to determine whether we need to emit - /// non-matching bytes before emitting a match. - report_pos: usize, - /// A match that should be reported on the next call. - pending_match: Option, - /// Enabled only when the automaton can match the empty string. When - /// enabled, we need to execute one final search after consuming the - /// reader to find the trailing empty match. - has_empty_match_at_end: bool, -} - -/// A single chunk yielded by the stream chunk iterator. -/// -/// The `'r` lifetime refers to the lifetime of the stream chunk iterator. -#[derive(Debug)] -enum StreamChunk<'r> { - /// A chunk that does not contain any matches. - NonMatch { bytes: &'r [u8] }, - /// A chunk that precisely contains a match. - Match { bytes: &'r [u8], mat: Match }, -} - -impl<'a, R: io::Read, S: StateID> StreamChunkIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamChunkIter<'a, R, S> { - assert!( - ac.supports_stream(), - "stream searching is only supported for Standard match semantics" - ); - - let prestate = if ac.imp.use_prefilter() { - PrefilterState::new(ac.max_pattern_len()) - } else { - PrefilterState::disabled() - }; - let buf = Buffer::new(ac.imp.max_pattern_len()); - let state_id = ac.imp.start_state(); - StreamChunkIter { - fsm: &ac.imp, - prestate, - rdr, - buf, - state_id, - absolute_pos: 0, - report_pos: 0, - search_pos: 0, - pending_match: None, - has_empty_match_at_end: ac.is_match(""), - } - } - - fn next(&mut self) -> Option> { - loop { - if let Some(mut mat) = self.pending_match.take() { - let bytes = &self.buf.buffer()[mat.start()..mat.end()]; - self.report_pos = mat.end(); - mat = mat.increment(self.absolute_pos); - return Some(Ok(StreamChunk::Match { bytes, mat })); - } - if self.search_pos >= self.buf.len() { - if let Some(end) = self.unreported() { - let bytes = &self.buf.buffer()[self.report_pos..end]; - self.report_pos = end; - return Some(Ok(StreamChunk::NonMatch { bytes })); - } - if self.buf.len() >= self.buf.min_buffer_len() { - // This is the point at which we roll our buffer, which we - // only do if our buffer has at least the minimum amount of - // bytes in it. Before rolling, we update our various - // positions to be consistent with the buffer after it has - // been rolled. - - self.report_pos -= - self.buf.len() - self.buf.min_buffer_len(); - self.absolute_pos += - self.search_pos - self.buf.min_buffer_len(); - self.search_pos = self.buf.min_buffer_len(); - self.buf.roll(); - } - match self.buf.fill(&mut self.rdr) { - Err(err) => return Some(Err(err)), - Ok(false) => { - // We've hit EOF, but if there are still some - // unreported bytes remaining, return them now. - if self.report_pos < self.buf.len() { - let bytes = &self.buf.buffer()[self.report_pos..]; - self.report_pos = self.buf.len(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } else { - // We've reported everything, but there might still - // be a match at the very last position. - if !self.has_empty_match_at_end { - return None; - } - // fallthrough for another search to get trailing - // empty matches - self.has_empty_match_at_end = false; - } - } - Ok(true) => {} - } - } - let result = self.fsm.earliest_find_at( - &mut self.prestate, - self.buf.buffer(), - self.search_pos, - &mut self.state_id, - ); - match result { - None => { - self.search_pos = self.buf.len(); - } - Some(mat) => { - self.state_id = self.fsm.start_state(); - if mat.end() == self.search_pos { - // If the automaton can match the empty string and if - // we found an empty match, then we need to forcefully - // move the position. - self.search_pos += 1; - } else { - self.search_pos = mat.end(); - } - self.pending_match = Some(mat.clone()); - if self.report_pos < mat.start() { - let bytes = - &self.buf.buffer()[self.report_pos..mat.start()]; - self.report_pos = mat.start(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } - } - } - } - } - - fn unreported(&self) -> Option { - let end = self.search_pos.saturating_sub(self.buf.min_buffer_len()); - if self.report_pos < end { - Some(end) - } else { - None - } - } -} - -/// A builder for configuring an Aho-Corasick automaton. -#[derive(Clone, Debug)] -pub struct AhoCorasickBuilder { - nfa_builder: nfa::Builder, - dfa_builder: dfa::Builder, - dfa: bool, -} - -impl Default for AhoCorasickBuilder { - fn default() -> AhoCorasickBuilder { - AhoCorasickBuilder::new() - } -} - -impl AhoCorasickBuilder { - /// Create a new builder for configuring an Aho-Corasick automaton. - /// - /// If you don't need fine grained configuration or aren't sure which knobs - /// to set, try using - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// instead. - pub fn new() -> AhoCorasickBuilder { - AhoCorasickBuilder { - nfa_builder: nfa::Builder::new(), - dfa_builder: dfa::Builder::new(), - dfa: false, - } - } - - /// Build an Aho-Corasick automaton using the configuration set on this - /// builder. - /// - /// A builder may be reused to create more automatons. - /// - /// This method will use the default for representing internal state - /// identifiers, which is `usize`. This guarantees that building the - /// automaton will succeed and is generally a good default, but can make - /// the size of the automaton 2-8 times bigger than it needs to be, - /// depending on your target platform. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn build(&self, patterns: I) -> AhoCorasick - where - I: IntoIterator, - P: AsRef<[u8]>, - { - // The builder only returns an error if the chosen state ID - // representation is too small to fit all of the given patterns. In - // this case, since we fix the representation to usize, it will always - // work because it's impossible to overflow usize since the underlying - // storage would OOM long before that happens. - self.build_with_size::(patterns) - .expect("usize state ID type should always work") - } - - /// Build an Aho-Corasick automaton using the configuration set on this - /// builder with a specific state identifier representation. This only has - /// an effect when the `dfa` option is enabled. - /// - /// Generally, the choices for a state identifier representation are - /// `u8`, `u16`, `u32`, `u64` or `usize`, with `usize` being the default. - /// The advantage of choosing a smaller state identifier representation - /// is that the automaton produced will be smaller. This might be - /// beneficial for just generally using less space, or might even allow it - /// to fit more of the automaton in your CPU's cache, leading to overall - /// better search performance. - /// - /// Unlike the standard `build` method, this can report an error if the - /// state identifier representation cannot support the size of the - /// automaton. - /// - /// Note that the state identifier representation is determined by the - /// `S` type variable. This requires a type hint of some sort, either - /// by specifying the return type or using the turbofish, e.g., - /// `build_with_size::(...)`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac: AhoCorasick = AhoCorasickBuilder::new() - /// .build_with_size(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - /// - /// Or alternatively, with turbofish: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build_with_size::(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn build_with_size( - &self, - patterns: I, - ) -> Result> - where - S: StateID, - I: IntoIterator, - P: AsRef<[u8]>, - { - let nfa = self.nfa_builder.build(patterns)?; - let match_kind = nfa.match_kind().clone(); - let imp = if self.dfa { - let dfa = self.dfa_builder.build(&nfa)?; - Imp::DFA(dfa) - } else { - Imp::NFA(nfa) - }; - Ok(AhoCorasick { imp, match_kind }) - } - - /// Automatically configure the settings on this builder according to the - /// patterns that will be used to construct the automaton. - /// - /// The idea here is to balance space and time automatically. That is, when - /// searching a small number of patterns, this will attempt to use the - /// fastest possible configuration since the total space required will be - /// small anyway. As the number of patterns grows, this will fall back to - /// slower configurations that use less space. - /// - /// This is guaranteed to never set `match_kind`, but any other option may - /// be overridden. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .auto_configure(patterns) - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn auto_configure>( - &mut self, - patterns: &[B], - ) -> &mut AhoCorasickBuilder { - // N.B. Currently we only use the length of `patterns` to make a - // decision here, and could therefore ask for an `ExactSizeIterator` - // instead. But it's conceivable that we might adapt this to look at - // the total number of bytes, which would requires a second pass. - // - // The logic here is fairly rudimentary at the moment, but probably - // OK. The idea here is to use the fastest thing possible for a small - // number of patterns. That is, a DFA with no byte classes, since byte - // classes require an extra indirection for every byte searched. With a - // moderate number of patterns, we still want a DFA, but save on both - // space and compilation time by enabling byte classes. Finally, fall - // back to the slower but smaller NFA. - if patterns.len() <= 100 { - // N.B. Using byte classes can actually be faster by improving - // locality, but this only really applies for multi-megabyte - // automata (i.e., automata that don't fit in your CPU's cache). - self.dfa(true); - } else if patterns.len() <= 5000 { - self.dfa(true); - } - self - } - - /// Set the desired match semantics. - /// - /// The default is `MatchKind::Standard`, which corresponds to the match - /// semantics supported by the standard textbook description of the - /// Aho-Corasick algorithm. Namely, matches are reported as soon as they - /// are found. Moreover, this is the only way to get overlapping matches - /// or do stream searching. - /// - /// The other kinds of match semantics that are supported are - /// `MatchKind::LeftmostFirst` and `MatchKind::LeftmostLongest`. The former - /// corresponds to the match you would get if you were to try to match - /// each pattern at each position in the haystack in the same order that - /// you give to the automaton. That is, it returns the leftmost match - /// corresponding the earliest pattern given to the automaton. The latter - /// corresponds to finding the longest possible match among all leftmost - /// matches. - /// - /// For more details on match semantics, see the - /// [documentation for `MatchKind`](enum.MatchKind.html). - /// - /// # Examples - /// - /// In these examples, we demonstrate the differences between match - /// semantics for a particular set of patterns in a specific order: - /// `b`, `abc`, `abcd`. - /// - /// Standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("b", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); - /// ``` - pub fn match_kind(&mut self, kind: MatchKind) -> &mut AhoCorasickBuilder { - self.nfa_builder.match_kind(kind); - self - } - - /// Enable anchored mode, which requires all matches to start at the - /// first position in a haystack. - /// - /// This option is disabled by default. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar"]; - /// let haystack = "foobar"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(1, ac.find_iter(haystack).count()); - /// ``` - /// - /// When searching for overlapping matches, all matches that start at - /// the beginning of a haystack will be reported: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "foofoo"]; - /// let haystack = "foofoo"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(2, ac.find_overlapping_iter(haystack).count()); - /// // A non-anchored search would return 3 matches. - /// ``` - pub fn anchored(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.anchored(yes); - self - } - - /// Enable ASCII-aware case insensitive matching. - /// - /// When this option is enabled, searching will be performed without - /// respect to case for ASCII letters (`a-z` and `A-Z`) only. - /// - /// Enabling this option does not change the search algorithm, but it may - /// increase the size of the automaton. - /// - /// **NOTE:** It is unlikely that support for Unicode case folding will - /// be added in the future. The ASCII case works via a simple hack to the - /// underlying automaton, but full Unicode handling requires a fair bit of - /// sophistication. If you do need Unicode handling, you might consider - /// using the [`regex` crate](https://docs.rs/regex) or the lower level - /// [`regex-automata` crate](https://docs.rs/regex-automata). - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["FOO", "bAr", "BaZ"]; - /// let haystack = "foo bar baz"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .ascii_case_insensitive(true) - /// .build(patterns); - /// assert_eq!(3, ac.find_iter(haystack).count()); - /// ``` - pub fn ascii_case_insensitive( - &mut self, - yes: bool, - ) -> &mut AhoCorasickBuilder { - self.nfa_builder.ascii_case_insensitive(yes); - self - } - - /// Set the limit on how many NFA states use a dense representation for - /// their transitions. - /// - /// A dense representation uses more space, but supports faster access to - /// transitions at search time. Thus, this setting permits the control of a - /// space vs time trade off when using the NFA variant of Aho-Corasick. - /// - /// This limit is expressed in terms of the depth of a state, i.e., the - /// number of transitions from the starting state of the NFA. The idea is - /// that most of the time searching will be spent near the starting state - /// of the automaton, so states near the start state should use a dense - /// representation. States further away from the start state would then use - /// a sparse representation, which uses less space but is slower to access - /// transitions at search time. - /// - /// By default, this is set to a low but non-zero number. - /// - /// This setting has no effect if the `dfa` option is enabled. - pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder { - self.nfa_builder.dense_depth(depth); - self - } - - /// Compile the standard Aho-Corasick automaton into a deterministic finite - /// automaton (DFA). - /// - /// When this is disabled (which is the default), then a non-deterministic - /// finite automaton (NFA) is used instead. - /// - /// The main benefit to a DFA is that it can execute searches more quickly - /// than a NFA (perhaps 2-4 times as fast). The main drawback is that the - /// DFA uses more space and can take much longer to build. - /// - /// Enabling this option does not change the time complexity for - /// constructing the Aho-Corasick automaton (which is `O(p)` where - /// `p` is the total number of patterns being compiled). Enabling this - /// option does however reduce the time complexity of non-overlapping - /// searches from `O(n + p)` to `O(n)`, where `n` is the length of the - /// haystack. - /// - /// In general, it's a good idea to enable this if you're searching a - /// small number of fairly short patterns (~1000), or if you want the - /// fastest possible search without regard to compilation time or space - /// usage. - pub fn dfa(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa = yes; - self - } - - /// Enable heuristic prefilter optimizations. - /// - /// When enabled, searching will attempt to quickly skip to match - /// candidates using specialized literal search routines. A prefilter - /// cannot always be used, and is generally treated as a heuristic. It - /// can be useful to disable this if the prefilter is observed to be - /// sub-optimal for a particular workload. - /// - /// This is enabled by default. - pub fn prefilter(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.prefilter(yes); - self - } - - /// Shrink the size of the transition alphabet by mapping bytes to their - /// equivalence classes. This only has an effect when the `dfa` option is - /// enabled. - /// - /// When enabled, each a DFA will use a map from all possible bytes - /// to their corresponding equivalence class. Each equivalence class - /// represents a set of bytes that does not discriminate between a match - /// and a non-match in the DFA. For example, the patterns `bar` and `baz` - /// have at least five equivalence classes: singleton sets of `b`, `a`, `r` - /// and `z`, and a final set that contains every other byte. - /// - /// The advantage of this map is that the size of the transition table can - /// be reduced drastically from `#states * 256 * sizeof(id)` to - /// `#states * k * sizeof(id)` where `k` is the number of equivalence - /// classes. As a result, total space usage can decrease substantially. - /// Moreover, since a smaller alphabet is used, compilation becomes faster - /// as well. - /// - /// The disadvantage of this map is that every byte searched must be - /// passed through this map before it can be used to determine the next - /// transition. This has a small match time performance cost. However, if - /// the DFA is otherwise very large without byte classes, then using byte - /// classes can greatly improve memory locality and thus lead to better - /// overall performance. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn byte_classes(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.byte_classes(yes); - self - } - - /// Premultiply state identifiers in the transition table. This only has - /// an effect when the `dfa` option is enabled. - /// - /// When enabled, state identifiers are premultiplied to point to their - /// corresponding row in the transition table. That is, given the `i`th - /// state, its corresponding premultiplied identifier is `i * k` where `k` - /// is the alphabet size of the automaton. (The alphabet size is at most - /// 256, but is in practice smaller if byte classes is enabled.) - /// - /// When state identifiers are not premultiplied, then the identifier of - /// the `i`th state is `i`. - /// - /// The advantage of premultiplying state identifiers is that is saves a - /// multiplication instruction per byte when searching with a DFA. This has - /// been observed to lead to a 20% performance benefit in micro-benchmarks. - /// - /// The primary disadvantage of premultiplying state identifiers is - /// that they require a larger integer size to represent. For example, - /// if the DFA has 200 states, then its premultiplied form requires 16 - /// bits to represent every possible state identifier, where as its - /// non-premultiplied form only requires 8 bits. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn premultiply(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.premultiply(yes); - self - } -} - -/// A knob for controlling the match semantics of an Aho-Corasick automaton. -/// -/// There are two generally different ways that Aho-Corasick automatons can -/// report matches. The first way is the "standard" approach that results from -/// implementing most textbook explanations of Aho-Corasick. The second way is -/// to report only the leftmost non-overlapping matches. The leftmost approach -/// is in turn split into two different ways of resolving ambiguous matches: -/// leftmost-first and leftmost-longest. -/// -/// The `Standard` match kind is the default and is the only one that supports -/// overlapping matches and stream searching. (Trying to find overlapping -/// or streaming matches using leftmost match semantics will result in a -/// panic.) The `Standard` match kind will report matches as they are seen. -/// When searching for overlapping matches, then all possible matches are -/// reported. When searching for non-overlapping matches, the first match seen -/// is reported. For example, for non-overlapping matches, given the patterns -/// `abcd` and `b` and the subject string `abcdef`, only a match for `b` is -/// reported since it is detected first. The `abcd` match is never reported -/// since it overlaps with the `b` match. -/// -/// In contrast, the leftmost match kind always prefers the leftmost match -/// among all possible matches. Given the same example as above with `abcd` and -/// `b` as patterns and `abcdef` as the subject string, the leftmost match is -/// `abcd` since it begins before the `b` match, even though the `b` match is -/// detected before the `abcd` match. In this case, the `b` match is not -/// reported at all since it overlaps with the `abcd` match. -/// -/// The difference between leftmost-first and leftmost-longest is in how they -/// resolve ambiguous matches when there are multiple leftmost matches to -/// choose from. Leftmost-first always chooses the pattern that was provided -/// earliest, where as leftmost-longest always chooses the longest matching -/// pattern. For example, given the patterns `a` and `ab` and the subject -/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match -/// is `ab`. Conversely, if the patterns were given in reverse order, i.e., -/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches -/// would be `ab`. Stated differently, the leftmost-first match depends on the -/// order in which the patterns were given to the Aho-Corasick automaton. -/// Because of that, when leftmost-first matching is used, if a pattern `A` -/// that appears before a pattern `B` is a prefix of `B`, then it is impossible -/// to ever observe a match of `B`. -/// -/// If you're not sure which match kind to pick, then stick with the standard -/// kind, which is the default. In particular, if you need overlapping or -/// streaming matches, then you _must_ use the standard kind. The leftmost -/// kinds are useful in specific circumstances. For example, leftmost-first can -/// be very useful as a way to implement match priority based on the order of -/// patterns given and leftmost-longest can be useful for dictionary searching -/// such that only the longest matching words are reported. -/// -/// # Relationship with regular expression alternations -/// -/// Understanding match semantics can be a little tricky, and one easy way -/// to conceptualize non-overlapping matches from an Aho-Corasick automaton -/// is to think about them as a simple alternation of literals in a regular -/// expression. For example, let's say we wanted to match the strings -/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It -/// turns out that regular expression engines have two different ways of -/// matching this alternation. The first way, leftmost-longest, is commonly -/// found in POSIX compatible implementations of regular expressions (such as -/// `grep`). The second way, leftmost-first, is commonly found in backtracking -/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's -/// regex engine do not use backtracking, but still implement leftmost-first -/// semantics in an effort to match the behavior of dominant backtracking -/// regex engines such as those found in Perl, Ruby, Python, Javascript and -/// PHP.) -/// -/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex -/// will match `Samwise` because it is the longest possible match, but a -/// Perl-like regex will match `Sam` since it appears earlier in the -/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine -/// will never match `Samwise` since `Sam` will always have higher priority. -/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to -/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is -/// still longest match, but it also appears earlier than `Sam`. -/// -/// The "standard" match semantics of Aho-Corasick generally don't correspond -/// to the match semantics of any large group of regex implementations, so -/// there's no direct analogy that can be made here. Standard match semantics -/// are generally useful for overlapping matches, or if you just want to see -/// matches as they are detected. -/// -/// The main conclusion to draw from this section is that the match semantics -/// can be tweaked to precisely match either Perl-like regex alternations or -/// POSIX regex alternations. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum MatchKind { - /// Use standard match semantics, which support overlapping matches. When - /// used with non-overlapping matches, matches are reported as they are - /// seen. - Standard, - /// Use leftmost-first match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the match - /// corresponding to the pattern that appeared earlier when constructing - /// the automaton is reported. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostFirst, - /// Use leftmost-longest match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the longest match - /// is chosen. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, -} - -/// The default match kind is `MatchKind::Standard`. -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::Standard - } -} - -impl MatchKind { - fn supports_overlapping(&self) -> bool { - self.is_standard() - } - - fn supports_stream(&self) -> bool { - // TODO: It may be possible to support this. It's hard. - // - // See: https://github.com/rust-lang/regex/issues/425#issuecomment-471367838 - self.is_standard() - } - - pub(crate) fn is_standard(&self) -> bool { - *self == MatchKind::Standard - } - - pub(crate) fn is_leftmost(&self) -> bool { - *self == MatchKind::LeftmostFirst - || *self == MatchKind::LeftmostLongest - } - - pub(crate) fn is_leftmost_first(&self) -> bool { - *self == MatchKind::LeftmostFirst - } - - /// Convert this match kind into a packed match kind. If this match kind - /// corresponds to standard semantics, then this returns None, since - /// packed searching does not support standard semantics. - pub(crate) fn as_packed(&self) -> Option { - match *self { - MatchKind::Standard => None, - MatchKind::LeftmostFirst => Some(packed::MatchKind::LeftmostFirst), - MatchKind::LeftmostLongest => { - Some(packed::MatchKind::LeftmostLongest) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn oibits() { - use std::panic::{RefUnwindSafe, UnwindSafe}; - - fn assert_send() {} - fn assert_sync() {} - fn assert_unwind_safe() {} - - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,573 +0,0 @@ -use crate::ahocorasick::MatchKind; -use crate::prefilter::{self, Candidate, Prefilter, PrefilterState}; -use crate::state_id::{dead_id, fail_id, StateID}; -use crate::Match; - -// NOTE: This trait essentially started as a copy of the same trait from from -// regex-automata, with some wording changed since we use this trait for -// NFAs in addition to DFAs in this crate. Additionally, we do not export -// this trait. It's only used internally to reduce code duplication. The -// regex-automata crate needs to expose it because its Regex type is generic -// over implementations of this trait. In this crate, we encapsulate everything -// behind the AhoCorasick type. -// -// This trait is a bit of a mess, but it's not quite clear how to fix it. -// Basically, there are several competing concerns: -// -// * We need performance, so everything effectively needs to get monomorphized. -// * There are several variations on searching Aho-Corasick automatons: -// overlapping, standard and leftmost. Overlapping and standard are somewhat -// combined together below, but there is no real way to combine standard with -// leftmost. Namely, leftmost requires continuing a search even after a match -// is found, in order to correctly disambiguate a match. -// * On top of that, *sometimes* callers want to know which state the automaton -// is in after searching. This is principally useful for overlapping and -// stream searches. However, when callers don't care about this, we really -// do not want to be forced to compute it, since it sometimes requires extra -// work. Thus, there are effectively two copies of leftmost searching: one -// for tracking the state ID and one that doesn't. We should ideally do the -// same for standard searching, but my sanity stopped me. - -// SAFETY RATIONALE: Previously, the code below went to some length to remove -// all bounds checks. This generally produced tighter assembly and lead to -// 20-50% improvements in micro-benchmarks on corpora made up of random -// characters. This somewhat makes sense, since the branch predictor is going -// to be at its worse on random text. -// -// However, using the aho-corasick-debug tool and manually benchmarking -// different inputs, the code *with* bounds checks actually wound up being -// slightly faster: -// -// $ cat input -// Sherlock Holmes -// John Watson -// Professor Moriarty -// Irene Adler -// Mary Watson -// -// $ aho-corasick-debug-safe \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 32.824µs -// automaton build time: 444.687µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 1.809961702s -// -// $ aho-corasick-debug-master \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 31.425µs -// automaton build time: 317.434µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 2.059157705s -// -// I was able to reproduce this result on two different machines (an i5 and -// an i7). Therefore, we go the route of safe code for now. - -/// A trait describing the interface of an Aho-Corasick finite state machine. -/// -/// Every automaton has exactly one fail state, one dead state and exactly one -/// start state. Generally, these correspond to the first, second and third -/// states, respectively. The dead state is always treated as a sentinel. That -/// is, no correct Aho-Corasick automaton will ever transition into the fail -/// state. The dead state, however, can be transitioned into, but only when -/// leftmost-first or leftmost-longest match semantics are enabled and only -/// when at least one match has been observed. -/// -/// Every automaton also has one or more match states, such that -/// `Automaton::is_match_state(id)` returns `true` if and only if `id` -/// corresponds to a match state. -pub trait Automaton { - /// The representation used for state identifiers in this automaton. - /// - /// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`. - type ID: StateID; - - /// The type of matching that should be done. - fn match_kind(&self) -> &MatchKind; - - /// Returns true if and only if this automaton uses anchored searches. - fn anchored(&self) -> bool; - - /// An optional prefilter for quickly skipping to the next candidate match. - /// A prefilter must report at least every match, although it may report - /// positions that do not correspond to a match. That is, it must not allow - /// false negatives, but can allow false positives. - /// - /// Currently, a prefilter only runs when the automaton is in the start - /// state. That is, the position reported by a prefilter should always - /// correspond to the start of a potential match. - fn prefilter(&self) -> Option<&dyn Prefilter>; - - /// Return the identifier of this automaton's start state. - fn start_state(&self) -> Self::ID; - - /// Returns true if and only if the given state identifier refers to a - /// valid state. - fn is_valid(&self, id: Self::ID) -> bool; - - /// Returns true if and only if the given identifier corresponds to a match - /// state. - /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_state(&self, id: Self::ID) -> bool; - - /// Returns true if and only if the given identifier corresponds to a state - /// that is either the dead state or a match state. - /// - /// Depending on the implementation of the automaton, this routine can - /// be used to save a branch in the core matching loop. Nevertheless, - /// `is_match_state(id) || id == dead_id()` is always a valid - /// implementation. Indeed, this is the default implementation. - /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_or_dead_state(&self, id: Self::ID) -> bool { - id == dead_id() || self.is_match_state(id) - } - - /// If the given state is a match state, return the match corresponding - /// to the given match index. `end` must be the ending position of the - /// detected match. If no match exists or if `match_index` exceeds the - /// number of matches in this state, then `None` is returned. - /// - /// The state ID given must be valid, or else implementors may panic. - /// - /// If the given state ID is correct and if the `match_index` is less than - /// the number of matches for that state, then this is guaranteed to return - /// a match. - fn get_match( - &self, - id: Self::ID, - match_index: usize, - end: usize, - ) -> Option; - - /// Returns the number of matches for the given state. If the given state - /// is not a match state, then this returns 0. - /// - /// The state ID given must be valid, or else implementors must panic. - fn match_count(&self, id: Self::ID) -> usize; - - /// Given the current state that this automaton is in and the next input - /// byte, this method returns the identifier of the next state. The - /// identifier returned must always be valid and may never correspond to - /// the fail state. The returned identifier may, however, point to the - /// dead state. - /// - /// This is not safe so that implementors may look up the next state - /// without memory safety checks such as bounds checks. As such, callers - /// must ensure that the given identifier corresponds to a valid automaton - /// state. Implementors must, in turn, ensure that this routine is safe for - /// all valid state identifiers and for all possible `u8` values. - fn next_state(&self, current: Self::ID, input: u8) -> Self::ID; - - /// Like next_state, but debug_asserts that the underlying - /// implementation never returns a `fail_id()` for the next state. - fn next_state_no_fail(&self, current: Self::ID, input: u8) -> Self::ID { - let next = self.next_state(current, input); - // We should never see a transition to the failure state. - debug_assert!( - next != fail_id(), - "automaton should never return fail_id for next state" - ); - next - } - - /// Execute a search using standard match semantics. - /// - /// This can be used even when the automaton was constructed with leftmost - /// match semantics when you want to find the earliest possible match. This - /// can also be used as part of an overlapping search implementation. - /// - /// N.B. This does not report a match if `state_id` is given as a matching - /// state. As such, this should not be used directly. - #[inline(always)] - fn standard_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.standard_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.standard_find_at_imp(prestate, None, haystack, at, state_id) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is standard_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn standard_find_at_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - // This routine always quits immediately after seeing a - // match, and since dead states can only come after seeing - // a match, seeing a dead state here is impossible. (Unless - // we have an anchored automaton, in which case, dead states - // are used to stop a search.) - debug_assert!( - *state_id != dead_id() || self.anchored(), - "standard find should never see a dead state" - ); - - if self.is_match_or_dead_state(*state_id) { - return if *state_id == dead_id() { - None - } else { - self.get_match(*state_id, 0, at) - }; - } - } - None - } - - /// Execute a search using leftmost (either first or longest) match - /// semantics. - /// - /// The principle difference between searching with standard semantics and - /// searching with leftmost semantics is that leftmost searching will - /// continue searching even after a match has been found. Once a match - /// is found, the search does not stop until either the haystack has been - /// exhausted or a dead state is observed in the automaton. (Dead states - /// only exist in automatons constructed with leftmost semantics.) That is, - /// we rely on the construction of the automaton to tell us when to quit. - #[inline(never)] - fn leftmost_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.leftmost_find_at_imp(prestate, None, haystack, at, state_id) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn leftmost_find_at_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; - } - let mut last_match = self.get_match(*state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(*state_id) { - if *state_id == dead_id() { - // The only way to enter into a dead state is if a match - // has been found, so we assert as much. This is different - // from normal automata, where you might enter a dead state - // if you know a subsequent match will never be found - // (regardless of whether a match has already been found). - // For Aho-Corasick, it is built so that we can match at - // any position, so the possibility of a match always - // exists. - // - // (Unless we have an anchored automaton, in which case, - // dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; - } - last_match = self.get_match(*state_id, 0, at); - } - } - last_match - } - - /// This is like leftmost_find_at, but does not need to track a caller - /// provided state id. In other words, the only output of this routine is a - /// match, if one exists. - /// - /// It is regrettable that we need to effectively copy a chunk of - /// implementation twice, but when we don't need to track the state ID, we - /// can allow the prefilter to report matches immediately without having - /// to re-confirm them with the automaton. The re-confirmation step is - /// necessary in leftmost_find_at because tracing through the automaton is - /// the only way to correctly set the state ID. (Perhaps an alternative - /// would be to keep a map from pattern ID to matching state ID, but that - /// complicates the code and still doesn't permit us to defer to the - /// prefilter entirely when possible.) - /// - /// I did try a few things to avoid the code duplication here, but nothing - /// optimized as well as this approach. (In microbenchmarks, there was - /// about a 25% difference.) - #[inline(never)] - fn leftmost_find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_no_state_imp( - prestate, - Some(pre), - haystack, - at, - ) - } else { - self.leftmost_find_at_no_state_imp(prestate, None, haystack, at) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at_no_state, and the inlining should remove the case - // analysis for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn leftmost_find_at_no_state_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 { - return None; - } - // If our prefilter handles confirmation of matches 100% of the - // time, and since we don't need to track state IDs, we can avoid - // Aho-Corasick completely. - if let Some(pre) = prefilter { - // We should never have a prefilter during an anchored search. - debug_assert!(!self.anchored()); - if !pre.reports_false_positives() { - return match pre.next_candidate(prestate, haystack, at) { - Candidate::None => None, - Candidate::Match(m) => Some(m), - Candidate::PossibleStartOfMatch(_) => unreachable!(), - }; - } - } - - let mut state_id = self.start_state(); - let mut last_match = self.get_match(state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && state_id == self.start_state() - { - match prefilter::next(prestate, pre, haystack, at) { - Candidate::None => return None, - // Since we aren't tracking a state ID, we can - // quit early once we know we have a match. - Candidate::Match(m) => return Some(m), - Candidate::PossibleStartOfMatch(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - state_id = self.next_state_no_fail(state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(state_id) { - if state_id == dead_id() { - // The only way to enter into a dead state is if a - // match has been found, so we assert as much. This - // is different from normal automata, where you might - // enter a dead state if you know a subsequent match - // will never be found (regardless of whether a match - // has already been found). For Aho-Corasick, it is - // built so that we can match at any position, so the - // possibility of a match always exists. - // - // (Unless we have an anchored automaton, in which - // case, dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; - } - last_match = self.get_match(state_id, 0, at); - } - } - last_match - } - - /// Execute an overlapping search. - /// - /// When executing an overlapping match, the previous state ID in addition - /// to the previous match index should be given. If there are more matches - /// at the given state, then the match is reported and the given index is - /// incremented. - #[inline(always)] - fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - match_index: &mut usize, - ) -> Option { - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; - } - - let match_count = self.match_count(*state_id); - if *match_index < match_count { - // This is guaranteed to return a match since - // match_index < match_count. - let result = self.get_match(*state_id, *match_index, at); - debug_assert!(result.is_some(), "must be a match"); - *match_index += 1; - return result; - } - - *match_index = 0; - match self.standard_find_at(prestate, haystack, at, state_id) { - None => None, - Some(m) => { - *match_index = 1; - Some(m) - } - } - } - - /// Return the earliest match found. This returns as soon as we know that - /// we have a match. As such, this does not necessarily correspond to the - /// leftmost starting match, but rather, the leftmost position at which a - /// match ends. - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if *state_id == self.start_state() { - if self.anchored() && at > 0 { - return None; - } - if let Some(m) = self.get_match(*state_id, 0, at) { - return Some(m); - } - } - self.standard_find_at(prestate, haystack, at, state_id) - } - - /// A convenience function for finding the next match according to the - /// match semantics of this automaton. For standard match semantics, this - /// finds the earliest match. Otherwise, the leftmost match is found. - #[inline(always)] - fn find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - self.earliest_find_at(prestate, haystack, at, state_id) - } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at(prestate, haystack, at, state_id) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } - - /// Like find_at, but does not track state identifiers. This permits some - /// optimizations when a prefilter that confirms its own matches is - /// present. - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - let mut state = self.start_state(); - self.earliest_find_at(prestate, haystack, at, &mut state) - } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at_no_state(prestate, haystack, at) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -use std::cmp; -use std::io; -use std::ptr; - -/// The default buffer capacity that we use for the stream buffer. -const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB - -/// A fairly simple roll buffer for supporting stream searches. -/// -/// This buffer acts as a temporary place to store a fixed amount of data when -/// reading from a stream. Its central purpose is to allow "rolling" some -/// suffix of the data to the beginning of the buffer before refilling it with -/// more data from the stream. For example, let's say we are trying to match -/// "foobar" on a stream. When we report the match, we'd like to not only -/// report the correct offsets at which the match occurs, but also the matching -/// bytes themselves. So let's say our stream is a file with the following -/// contents: `test test foobar test test`. Now assume that we happen to read -/// the aforementioned file in two chunks: `test test foo` and `bar test test`. -/// Naively, it would not be possible to report a single contiguous `foobar` -/// match, but this roll buffer allows us to do that. Namely, after the second -/// read, the contents of the buffer should be `st foobar test test`, where the -/// search should ultimately resume immediately after `foo`. (The prefix `st ` -/// is included because the roll buffer saves N bytes at the end of the buffer, -/// where N is the maximum possible length of a match.) -/// -/// A lot of the logic for dealing with this is unfortunately split out between -/// this roll buffer and the `StreamChunkIter`. -#[derive(Debug)] -pub struct Buffer { - /// The raw buffer contents. This has a fixed size and never increases. - buf: Vec, - /// The minimum size of the buffer, which is equivalent to the maximum - /// possible length of a match. This corresponds to the amount that we - /// roll - min: usize, - /// The end of the contents of this buffer. - end: usize, -} - -impl Buffer { - /// Create a new buffer for stream searching. The minimum buffer length - /// given should be the size of the maximum possible match length. - pub fn new(min_buffer_len: usize) -> Buffer { - let min = cmp::max(1, min_buffer_len); - // The minimum buffer amount is also the amount that we roll our - // buffer in order to support incremental searching. To this end, - // our actual capacity needs to be at least 1 byte bigger than our - // minimum amount, otherwise we won't have any overlap. In actuality, - // we want our buffer to be a bit bigger than that for performance - // reasons, so we set a lower bound of `8 * min`. - // - // TODO: It would be good to find a way to test the streaming - // implementation with the minimal buffer size. For now, we just - // uncomment out the next line and comment out the subsequent line. - // let capacity = 1 + min; - let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY); - Buffer { buf: vec![0; capacity], min, end: 0 } - } - - /// Return the contents of this buffer. - #[inline] - pub fn buffer(&self) -> &[u8] { - &self.buf[..self.end] - } - - /// Return the minimum size of the buffer. The only way a buffer may be - /// smaller than this is if the stream itself contains less than the - /// minimum buffer amount. - #[inline] - pub fn min_buffer_len(&self) -> usize { - self.min - } - - /// Return the total length of the contents in the buffer. - #[inline] - pub fn len(&self) -> usize { - self.end - } - - /// Return all free capacity in this buffer. - fn free_buffer(&mut self) -> &mut [u8] { - &mut self.buf[self.end..] - } - - /// Refill the contents of this buffer by reading as much as possible into - /// this buffer's free capacity. If no more bytes could be read, then this - /// returns false. Otherwise, this reads until it has filled the buffer - /// past the minimum amount. - pub fn fill(&mut self, mut rdr: R) -> io::Result { - let mut readany = false; - loop { - let readlen = rdr.read(self.free_buffer())?; - if readlen == 0 { - return Ok(readany); - } - readany = true; - self.end += readlen; - if self.len() >= self.min { - return Ok(true); - } - } - } - - /// Roll the contents of the buffer so that the suffix of this buffer is - /// moved to the front and all other contents are dropped. The size of the - /// suffix corresponds precisely to the minimum buffer length. - /// - /// This should only be called when the entire contents of this buffer have - /// been searched. - pub fn roll(&mut self) { - let roll_start = self - .end - .checked_sub(self.min) - .expect("buffer capacity should be bigger than minimum amount"); - let roll_len = self.min; - - assert!(roll_start + roll_len <= self.end); - unsafe { - // SAFETY: A buffer contains Copy data, so there's no problem - // moving it around. Safety also depends on our indices being in - // bounds, which they always should be, given the assert above. - // - // TODO: Switch to [T]::copy_within once our MSRV is high enough. - ptr::copy( - self.buf[roll_start..].as_ptr(), - self.buf.as_mut_ptr(), - roll_len, - ); - } - self.end = roll_len; - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,258 +0,0 @@ -pub const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,238 +0,0 @@ -use std::fmt; - -/// A representation of byte oriented equivalence classes. -/// -/// This is used in an FSM to reduce the size of the transition table. This can -/// have a particularly large impact not only on the total size of an FSM, but -/// also on compile times. -#[derive(Clone, Copy)] -pub struct ByteClasses([u8; 256]); - -impl ByteClasses { - /// Creates a new set of equivalence classes where all bytes are mapped to - /// the same class. - pub fn empty() -> ByteClasses { - ByteClasses([0; 256]) - } - - /// Creates a new set of equivalence classes where each byte belongs to - /// its own equivalence class. - pub fn singletons() -> ByteClasses { - let mut classes = ByteClasses::empty(); - for i in 0..256 { - classes.set(i as u8, i as u8); - } - classes - } - - /// Set the equivalence class for the given byte. - #[inline] - pub fn set(&mut self, byte: u8, class: u8) { - self.0[byte as usize] = class; - } - - /// Get the equivalence class for the given byte. - #[inline] - pub fn get(&self, byte: u8) -> u8 { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - self.0[byte as usize] - } - - /// Return the total number of elements in the alphabet represented by - /// these equivalence classes. Equivalently, this returns the total number - /// of equivalence classes. - #[inline] - pub fn alphabet_len(&self) -> usize { - self.0[255] as usize + 1 - } - - /// Returns true if and only if every byte in this class maps to its own - /// equivalence class. Equivalently, there are 256 equivalence classes - /// and each class contains exactly one byte. - #[inline] - pub fn is_singleton(&self) -> bool { - self.alphabet_len() == 256 - } - - /// Returns an iterator over a sequence of representative bytes from each - /// equivalence class. Namely, this yields exactly N items, where N is - /// equivalent to the number of equivalence classes. Each item is an - /// arbitrary byte drawn from each equivalence class. - /// - /// This is useful when one is determinizing an NFA and the NFA's alphabet - /// hasn't been converted to equivalence classes yet. Picking an arbitrary - /// byte from each equivalence class then permits a full exploration of - /// the NFA instead of using every possible byte value. - pub fn representatives(&self) -> ByteClassRepresentatives<'_> { - ByteClassRepresentatives { classes: self, byte: 0, last_class: None } - } - - /// Returns all of the bytes in the given equivalence class. - /// - /// The second element in the tuple indicates the number of elements in - /// the array. - fn elements(&self, equiv: u8) -> ([u8; 256], usize) { - let (mut array, mut len) = ([0; 256], 0); - for b in 0..256 { - if self.get(b as u8) == equiv { - array[len] = b as u8; - len += 1; - } - } - (array, len) - } -} - -impl fmt::Debug for ByteClasses { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_singleton() { - write!(f, "ByteClasses({{singletons}})") - } else { - write!(f, "ByteClasses(")?; - for equiv in 0..self.alphabet_len() { - let (members, len) = self.elements(equiv as u8); - write!(f, " {} => {:?}", equiv, &members[..len])?; - } - write!(f, ")") - } - } -} - -/// An iterator over representative bytes from each equivalence class. -#[derive(Debug)] -pub struct ByteClassRepresentatives<'a> { - classes: &'a ByteClasses, - byte: usize, - last_class: Option, -} - -impl<'a> Iterator for ByteClassRepresentatives<'a> { - type Item = u8; - - fn next(&mut self) -> Option { - while self.byte < 256 { - let byte = self.byte as u8; - let class = self.classes.get(byte); - self.byte += 1; - - if self.last_class != Some(class) { - self.last_class = Some(class); - return Some(byte); - } - } - None - } -} - -/// A byte class builder keeps track of an *approximation* of equivalence -/// classes of bytes during NFA construction. That is, every byte in an -/// equivalence class cannot discriminate between a match and a non-match. -/// -/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w] -/// and [{-\xFF] never discriminate between a match and a non-match, precisely -/// because they never occur in the literals anywhere. -/// -/// Note though that this does not necessarily compute the minimal set of -/// equivalence classes. For example, in the literals above, the byte ranges -/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence -/// classes even though they could be treated a single class. The reason for -/// this is implementation complexity. In the future, we should endeavor to -/// compute the minimal equivalence classes since they can have a rather large -/// impact on the size of the DFA. -/// -/// The representation here is 256 booleans, all initially set to false. Each -/// boolean maps to its corresponding byte based on position. A `true` value -/// indicates the end of an equivalence class, where its corresponding byte -/// and all of the bytes corresponding to all previous contiguous `false` -/// values are in the same equivalence class. -/// -/// This particular representation only permits contiguous ranges of bytes to -/// be in the same equivalence class, which means that we can never discover -/// the true minimal set of equivalence classes. -#[derive(Debug)] -pub struct ByteClassBuilder(Vec); - -impl ByteClassBuilder { - /// Create a new builder of byte classes where all bytes are part of the - /// same equivalence class. - pub fn new() -> ByteClassBuilder { - ByteClassBuilder(vec![false; 256]) - } - - /// Indicate the the range of byte given (inclusive) can discriminate a - /// match between it and all other bytes outside of the range. - pub fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - /// Build byte classes that map all byte values to their corresponding - /// equivalence class. The last mapping indicates the largest equivalence - /// class identifier (which is never bigger than 255). - pub fn build(&self) -> ByteClasses { - let mut classes = ByteClasses::empty(); - let mut class = 0u8; - let mut i = 0; - loop { - classes.set(i as u8, class as u8); - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - classes - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn byte_classes() { - let mut set = ByteClassBuilder::new(); - set.set_range(b'a', b'z'); - - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(b'a' - 1), 0); - assert_eq!(classes.get(b'a'), 1); - assert_eq!(classes.get(b'm'), 1); - assert_eq!(classes.get(b'z'), 1); - assert_eq!(classes.get(b'z' + 1), 2); - assert_eq!(classes.get(254), 2); - assert_eq!(classes.get(255), 2); - - let mut set = ByteClassBuilder::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(3), 1); - assert_eq!(classes.get(4), 2); - assert_eq!(classes.get(5), 2); - assert_eq!(classes.get(6), 2); - assert_eq!(classes.get(7), 3); - assert_eq!(classes.get(255), 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassBuilder::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.build().alphabet_len(), 256); - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,713 +0,0 @@ -use std::mem::size_of; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::ByteClasses; -use crate::error::Result; -use crate::nfa::{PatternID, PatternLength, NFA}; -use crate::prefilter::{Prefilter, PrefilterObj, PrefilterState}; -use crate::state_id::{dead_id, fail_id, premultiply_overflow_error, StateID}; -use crate::Match; - -#[derive(Clone, Debug)] -pub enum DFA { - Standard(Standard), - ByteClass(ByteClass), - Premultiplied(Premultiplied), - PremultipliedByteClass(PremultipliedByteClass), -} - -impl DFA { - fn repr(&self) -> &Repr { - match *self { - DFA::Standard(ref dfa) => dfa.repr(), - DFA::ByteClass(ref dfa) => dfa.repr(), - DFA::Premultiplied(ref dfa) => dfa.repr(), - DFA::PremultipliedByteClass(ref dfa) => dfa.repr(), - } - } - - pub fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - pub fn heap_bytes(&self) -> usize { - self.repr().heap_bytes - } - - pub fn max_pattern_len(&self) -> usize { - self.repr().max_pattern_len - } - - pub fn pattern_count(&self) -> usize { - self.repr().pattern_count - } - - pub fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - pub fn start_state(&self) -> S { - self.repr().start_id - } - - #[inline(always)] - pub fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::ByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - } - } - - #[inline(always)] - pub fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::ByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::Premultiplied(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - } - } - - #[inline(always)] - pub fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::ByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::Premultiplied(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - } - } -} - -#[derive(Clone, Debug)] -pub struct Standard(Repr); - -impl Standard { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for Standard { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) - } - - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) - } - - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() * 256 + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct ByteClass(Repr); - -impl ByteClass { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for ByteClass { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) - } - - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) - } - - fn next_state(&self, current: S, input: u8) -> S { - let alphabet_len = self.repr().byte_classes.alphabet_len(); - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() * alphabet_len + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct Premultiplied(Repr); - -impl Premultiplied { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for Premultiplied { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / 256) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; - } - self.repr() - .matches - .get(id.to_usize() / 256) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / 256; - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct PremultipliedByteClass(Repr); - -impl PremultipliedByteClass { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for PremultipliedByteClass { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; - } - self.repr() - .matches - .get(id.to_usize() / self.repr().alphabet_len()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / self.repr().alphabet_len(); - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct Repr { - match_kind: MatchKind, - anchored: bool, - premultiplied: bool, - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. - pattern_count: usize, - state_count: usize, - max_match: S, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly detecting candidate matchs, if pertinent. - prefilter: Option, - byte_classes: ByteClasses, - trans: Vec, - matches: Vec>, -} - -impl Repr { - /// Returns the total alphabet size for this DFA. - /// - /// If byte classes are enabled, then this corresponds to the number of - /// equivalence classes. If they are disabled, then this is always 256. - fn alphabet_len(&self) -> usize { - self.byte_classes.alphabet_len() - } - - /// Returns true only if the given state is a match state. - fn is_match_state(&self, id: S) -> bool { - id <= self.max_match && id > dead_id() - } - - /// Returns true only if the given state is either a dead state or a match - /// state. - fn is_match_or_dead_state(&self, id: S) -> bool { - id <= self.max_match - } - - /// Get the ith match for the given state, where the end position of a - /// match was found at `end`. - /// - /// # Panics - /// - /// The caller must ensure that the given state identifier is valid, - /// otherwise this may panic. The `match_index` need not be valid. That is, - /// if the given state has no matches then this returns `None`. - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.max_match { - return None; - } - self.matches - .get(id.to_usize()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - /// Return the total number of matches for the given state. - /// - /// # Panics - /// - /// The caller must ensure that the given identifier is valid, or else - /// this panics. - fn match_count(&self, id: S) -> usize { - self.matches[id.to_usize()].len() - } - - /// Get the next state given `from` as the current state and `byte` as the - /// current input byte. - fn next_state(&self, from: S, byte: u8) -> S { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] - } - - /// Set the `byte` transition for the `from` state to point to `to`. - fn set_next_state(&mut self, from: S, byte: u8, to: S) { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] = to; - } - - /// Swap the given states in place. - fn swap_states(&mut self, id1: S, id2: S) { - assert!(!self.premultiplied, "can't swap states in premultiplied DFA"); - - let o1 = id1.to_usize() * self.alphabet_len(); - let o2 = id2.to_usize() * self.alphabet_len(); - for b in 0..self.alphabet_len() { - self.trans.swap(o1 + b, o2 + b); - } - self.matches.swap(id1.to_usize(), id2.to_usize()); - } - - /// This routine shuffles all match states in this DFA to the beginning - /// of the DFA such that every non-match state appears after every match - /// state. (With one exception: the special fail and dead states remain as - /// the first two states.) - /// - /// The purpose of doing this shuffling is to avoid an extra conditional - /// in the search loop, and in particular, detecting whether a state is a - /// match or not does not need to access any memory. - /// - /// This updates `self.max_match` to point to the last matching state as - /// well as `self.start` if the starting state was moved. - fn shuffle_match_states(&mut self) { - assert!( - !self.premultiplied, - "cannot shuffle match states of premultiplied DFA" - ); - - if self.state_count <= 1 { - return; - } - - let mut first_non_match = self.start_id.to_usize(); - while first_non_match < self.state_count - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; - } - - let mut swaps: Vec = vec![fail_id(); self.state_count]; - let mut cur = self.state_count - 1; - while cur > first_non_match { - if self.matches[cur].len() > 0 { - self.swap_states( - S::from_usize(cur), - S::from_usize(first_non_match), - ); - swaps[cur] = S::from_usize(first_non_match); - swaps[first_non_match] = S::from_usize(cur); - - first_non_match += 1; - while first_non_match < cur - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; - } - } - cur -= 1; - } - for id in (0..self.state_count).map(S::from_usize) { - let alphabet_len = self.alphabet_len(); - let offset = id.to_usize() * alphabet_len; - for next in &mut self.trans[offset..offset + alphabet_len] { - if swaps[next.to_usize()] != fail_id() { - *next = swaps[next.to_usize()]; - } - } - } - if swaps[self.start_id.to_usize()] != fail_id() { - self.start_id = swaps[self.start_id.to_usize()]; - } - self.max_match = S::from_usize(first_non_match - 1); - } - - fn premultiply(&mut self) -> Result<()> { - if self.premultiplied || self.state_count <= 1 { - return Ok(()); - } - - let alpha_len = self.alphabet_len(); - premultiply_overflow_error( - S::from_usize(self.state_count - 1), - alpha_len, - )?; - - for id in (2..self.state_count).map(S::from_usize) { - let offset = id.to_usize() * alpha_len; - for next in &mut self.trans[offset..offset + alpha_len] { - if *next == dead_id() { - continue; - } - *next = S::from_usize(next.to_usize() * alpha_len); - } - } - self.premultiplied = true; - self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len); - self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len); - Ok(()) - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = (self.trans.len() * size_of::()) - + (self.matches.len() - * size_of::>()); - for state_matches in &self.matches { - size += - state_matches.len() * size_of::<(PatternID, PatternLength)>(); - } - size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()); - self.heap_bytes = size; - } -} - -/// A builder for configuring the determinization of an NFA into a DFA. -#[derive(Clone, Debug)] -pub struct Builder { - premultiply: bool, - byte_classes: bool, -} - -impl Builder { - /// Create a new builder for a DFA. - pub fn new() -> Builder { - Builder { premultiply: true, byte_classes: true } - } - - /// Build a DFA from the given NFA. - /// - /// This returns an error if the state identifiers exceed their - /// representation size. This can only happen when state ids are - /// premultiplied (which is enabled by default). - pub fn build(&self, nfa: &NFA) -> Result> { - let byte_classes = if self.byte_classes { - nfa.byte_classes().clone() - } else { - ByteClasses::singletons() - }; - let alphabet_len = byte_classes.alphabet_len(); - let trans = vec![fail_id(); alphabet_len * nfa.state_len()]; - let matches = vec![vec![]; nfa.state_len()]; - let mut repr = Repr { - match_kind: nfa.match_kind().clone(), - anchored: nfa.anchored(), - premultiplied: false, - start_id: nfa.start_state(), - max_pattern_len: nfa.max_pattern_len(), - pattern_count: nfa.pattern_count(), - state_count: nfa.state_len(), - max_match: fail_id(), - heap_bytes: 0, - prefilter: nfa.prefilter_obj().map(|p| p.clone()), - byte_classes: byte_classes.clone(), - trans, - matches, - }; - for id in (0..nfa.state_len()).map(S::from_usize) { - repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id)); - - let fail = nfa.failure_transition(id); - nfa.iter_all_transitions(&byte_classes, id, |b, mut next| { - if next == fail_id() { - next = nfa_next_state_memoized(nfa, &repr, id, fail, b); - } - repr.set_next_state(id, b, next); - }); - } - repr.shuffle_match_states(); - repr.calculate_size(); - if self.premultiply { - repr.premultiply()?; - if byte_classes.is_singleton() { - Ok(DFA::Premultiplied(Premultiplied(repr))) - } else { - Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr))) - } - } else { - if byte_classes.is_singleton() { - Ok(DFA::Standard(Standard(repr))) - } else { - Ok(DFA::ByteClass(ByteClass(repr))) - } - } - } - - /// Whether to use byte classes or in the DFA. - pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { - self.byte_classes = yes; - self - } - - /// Whether to premultiply state identifier in the DFA. - pub fn premultiply(&mut self, yes: bool) -> &mut Builder { - self.premultiply = yes; - self - } -} - -/// This returns the next NFA transition (including resolving failure -/// transitions), except once it sees a state id less than the id of the DFA -/// state that is currently being populated, then we no longer need to follow -/// failure transitions and can instead query the pre-computed state id from -/// the DFA itself. -/// -/// In general, this should only be called when a failure transition is seen. -fn nfa_next_state_memoized( - nfa: &NFA, - dfa: &Repr, - populating: S, - mut current: S, - input: u8, -) -> S { - loop { - if current < populating { - return dfa.next_state(current, input); - } - let next = nfa.next_state(current, input); - if next != fail_id() { - return next; - } - current = nfa.failure_transition(current); - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,101 +0,0 @@ -use std::error; -use std::fmt; -use std::result; - -pub type Result = result::Result; - -/// An error that occurred during the construction of an Aho-Corasick -/// automaton. -#[derive(Clone, Debug)] -pub struct Error { - kind: ErrorKind, -} - -/// The kind of error that occurred. -#[derive(Clone, Debug)] -pub enum ErrorKind { - /// An error that occurs when constructing an automaton would require the - /// use of a state ID that overflows the chosen state ID representation. - /// For example, if one is using `u8` for state IDs and builds a DFA with - /// 257 states, then the last state's ID will be `256` which cannot be - /// represented with `u8`. - StateIDOverflow { - /// The maximum possible state ID. - max: usize, - }, - /// An error that occurs when premultiplication of state IDs is requested - /// when constructing an Aho-Corasick DFA, but doing so would overflow the - /// chosen state ID representation. - /// - /// When `max == requested_max`, then the state ID would overflow `usize`. - PremultiplyOverflow { - /// The maximum possible state id. - max: usize, - /// The maximum ID required by premultiplication. - requested_max: usize, - }, -} - -impl Error { - /// Return the kind of this error. - pub fn kind(&self) -> &ErrorKind { - &self.kind - } - - pub(crate) fn state_id_overflow(max: usize) -> Error { - Error { kind: ErrorKind::StateIDOverflow { max } } - } - - pub(crate) fn premultiply_overflow( - max: usize, - requested_max: usize, - ) -> Error { - Error { kind: ErrorKind::PremultiplyOverflow { max, requested_max } } - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match self.kind { - ErrorKind::StateIDOverflow { .. } => { - "state id representation too small" - } - ErrorKind::PremultiplyOverflow { .. } => { - "state id representation too small for premultiplication" - } - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.kind { - ErrorKind::StateIDOverflow { max } => write!( - f, - "building the automaton failed because it required \ - building more states that can be identified, where the \ - maximum ID for the chosen representation is {}", - max, - ), - ErrorKind::PremultiplyOverflow { max, requested_max } => { - if max == requested_max { - write!( - f, - "premultiplication of states requires the ability to \ - represent a state ID greater than what can fit on \ - this platform's usize, which is {}", - ::std::usize::MAX, - ) - } else { - write!( - f, - "premultiplication of states requires the ability to \ - represent at least a state ID of {}, but the chosen \ - representation only permits a maximum state ID of {}", - requested_max, max, - ) - } - } - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,303 +0,0 @@ -/*! -A library for finding occurrences of many patterns at once. This library -provides multiple pattern search principally through an implementation of the -[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm), -which builds a fast finite state machine for executing searches in linear time. - -Additionally, this library provides a number of configuration options for -building the automaton that permit controlling the space versus time trade -off. Other features include simple ASCII case insensitive matching, finding -overlapping matches, replacements, searching streams and even searching and -replacing text in streams. - -Finally, unlike all other (known) Aho-Corasick implementations, this one -supports enabling -[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) -or -[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst) -match semantics, using a (seemingly) novel alternative construction algorithm. -For more details on what match semantics means, see the -[`MatchKind`](enum.MatchKind.html) -type. - -# Overview - -This section gives a brief overview of the primary types in this crate: - -* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents - an Aho-Corasick automaton. This is the type you use to execute searches. -* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build - an Aho-Corasick automaton, and supports configuring a number of options. -* [`Match`](struct.Match.html) represents a single match reported by an - Aho-Corasick automaton. Each match has two pieces of information: the pattern - that matched and the start and end byte offsets corresponding to the position - in the haystack at which it matched. - -Additionally, the [`packed`](packed/index.html) sub-module contains a lower -level API for using fast vectorized routines for finding a small number of -patterns in a haystack. - -# Example: basic searching - -This example shows how to search for occurrences of multiple patterns -simultaneously. Each match includes the pattern that matched along with the -byte offsets of the match. - -``` -use aho_corasick::AhoCorasick; - -let patterns = &["apple", "maple", "Snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasick::new(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - -# Example: case insensitivity - -This is like the previous example, but matches `Snapple` case insensitively -using `AhoCorasickBuilder`: - -``` -use aho_corasick::AhoCorasickBuilder; - -let patterns = &["apple", "maple", "snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - -# Example: replacing matches in a stream - -This example shows how to execute a search and replace on a stream without -loading the entire stream into memory first. - -``` -use aho_corasick::AhoCorasick; - -# fn example() -> Result<(), ::std::io::Error> { -let patterns = &["fox", "brown", "quick"]; -let replace_with = &["sloth", "grey", "slow"]; - -// In a real example, these might be `std::fs::File`s instead. All you need to -// do is supply a pair of `std::io::Read` and `std::io::Write` implementations. -let rdr = "The quick brown fox."; -let mut wtr = vec![]; - -let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?; -assert_eq!(b"The slow grey sloth.".to_vec(), wtr); -# Ok(()) }; example().unwrap() -``` - -# Example: finding the leftmost first match - -In the textbook description of Aho-Corasick, its formulation is typically -structured such that it reports all possible matches, even when they overlap -with another. In many cases, overlapping matches may not be desired, such as -the case of finding all successive non-overlapping matches like you might with -a standard regular expression. - -Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do -this doesn't always work in the expected way, since it will report matches as -soon as they are seen. For example, consider matching the regex `Samwise|Sam` -against the text `Samwise`. Most regex engines (that are Perl-like, or -non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick -algorithm modified for reporting non-overlapping matches will report `Sam`. - -A novel contribution of this library is the ability to change the match -semantics of Aho-Corasick (without additional search time overhead) such that -`Samwise` is reported instead. For example, here's the standard approach: - -``` -use aho_corasick::AhoCorasick; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasick::new(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Sam", &haystack[mat.start()..mat.end()]); -``` - -And now here's the leftmost-first version, which matches how a Perl-like -regex will work: - -``` -use aho_corasick::{AhoCorasickBuilder, MatchKind}; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); -``` - -In addition to leftmost-first semantics, this library also supports -leftmost-longest semantics, which match the POSIX behavior of a regular -expression alternation. See -[`MatchKind`](enum.MatchKind.html) -for more details. - -# Prefilters - -While an Aho-Corasick automaton can perform admirably when compared to more -naive solutions, it is generally slower than more specialized algorithms that -are accelerated using vector instructions such as SIMD. - -For that reason, this library will internally use a "prefilter" to attempt -to accelerate searches when possible. Currently, this library has several -different algorithms it might use depending on the patterns provided. Once the -number of patterns gets too big, prefilters are no longer used. - -While a prefilter is generally good to have on by default since it works -well in the common case, it can lead to less predictable or even sub-optimal -performance in some cases. For that reason, prefilters can be explicitly -disabled via -[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter). -*/ - -#![deny(missing_docs)] - -// We can never be truly no_std, but we could be alloc-only some day, so -// require the std feature for now. -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); - -// #[cfg(doctest)] -// #[macro_use] -// extern crate doc_comment; - -// #[cfg(doctest)] -// doctest!("../README.md"); - -pub use crate::ahocorasick::{ - AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind, - StreamFindIter, -}; -pub use crate::error::{Error, ErrorKind}; -pub use crate::state_id::StateID; - -mod ahocorasick; -mod automaton; -mod buffer; -mod byte_frequencies; -mod classes; -mod dfa; -mod error; -mod nfa; -pub mod packed; -mod prefilter; -mod state_id; -#[cfg(test)] -mod tests; - -/// A representation of a match reported by an Aho-Corasick automaton. -/// -/// A match has two essential pieces of information: the identifier of the -/// pattern that matched, along with the start and end offsets of the match -/// in the haystack. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use aho_corasick::AhoCorasick; -/// -/// let ac = AhoCorasick::new(&[ -/// "foo", "bar", "baz", -/// ]); -/// let mat = ac.find("xxx bar xxx").expect("should have a match"); -/// assert_eq!(1, mat.pattern()); -/// assert_eq!(4, mat.start()); -/// assert_eq!(7, mat.end()); -/// ``` -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct Match { - /// The pattern id. - pattern: usize, - /// The length of this match, such that the starting position of the match - /// is `end - len`. - /// - /// We use length here because, other than the pattern id, the only - /// information about each pattern that the automaton stores is its length. - /// So using the length here is just a bit more natural. But it isn't - /// technically required. - len: usize, - /// The end offset of the match, exclusive. - end: usize, -} - -impl Match { - /// Returns the identifier of the pattern that matched. - /// - /// The identifier of a pattern is derived from the position in which it - /// was originally inserted into the corresponding automaton. The first - /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` - /// and so on. - #[inline] - pub fn pattern(&self) -> usize { - self.pattern - } - - /// The starting position of the match. - #[inline] - pub fn start(&self) -> usize { - self.end - self.len - } - - /// The ending position of the match. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// The length, in bytes, of the match. - #[inline] - pub fn len(&self) -> usize { - self.len - } - - /// Returns true if and only if this match is empty. That is, when - /// `start() == end()`. - /// - /// An empty match can only be returned when the empty string was among - /// the patterns used to build the Aho-Corasick automaton. - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[inline] - fn increment(&self, by: usize) -> Match { - Match { pattern: self.pattern, len: self.len, end: self.end + by } - } - - #[inline] - fn from_span(id: usize, start: usize, end: usize) -> Match { - Match { pattern: id, len: end - start, end } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1214 +0,0 @@ -use std::cmp; -use std::collections::{BTreeSet, VecDeque}; -use std::fmt; -use std::mem::size_of; -use std::ops::{Index, IndexMut}; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::{ByteClassBuilder, ByteClasses}; -use crate::error::Result; -use crate::prefilter::{self, opposite_ascii_case, Prefilter, PrefilterObj}; -use crate::state_id::{dead_id, fail_id, usize_to_state_id, StateID}; -use crate::Match; - -/// The identifier for a pattern, which is simply the position of the pattern -/// in the sequence of patterns given by the caller. -pub type PatternID = usize; - -/// The length of a pattern, in bytes. -pub type PatternLength = usize; - -/// An Aho-Corasick automaton, represented as an NFA. -/// -/// This is the classical formulation of Aho-Corasick, which involves building -/// up a prefix trie of a given set of patterns, and then wiring up failure -/// transitions between states in order to guarantee linear time matching. The -/// standard formulation is, technically, an NFA because of these failure -/// transitions. That is, one can see them as enabling the automaton to be in -/// multiple states at once. Indeed, during search, it is possible to check -/// the transitions on multiple states for a single input byte. -/// -/// This particular implementation not only supports the standard style of -/// matching, but also provides a mode for choosing leftmost-first or -/// leftmost-longest match semantics. When a leftmost mode is chosen, some -/// failure transitions that would otherwise be added are elided. See -/// the documentation of `MatchKind` for more details and examples on how the -/// match semantics may differ. -/// -/// If one wants a DFA, then it is necessary to first build an NFA and convert -/// it into a DFA. Note, however, that because we've constrained ourselves to -/// matching literal patterns, this does not need to use subset construction -/// for determinization. Instead, the DFA has at most a number of states -/// equivalent to the number of NFA states. The only real difference between -/// them is that all failure transitions are followed and pre-computed. This -/// uses much more memory, but also executes searches more quickly. -#[derive(Clone)] -pub struct NFA { - /// The match semantics built into this NFA. - match_kind: MatchKind, - /// The start state id as an index into `states`. - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton, including - /// patterns that may never be matched. - pattern_count: usize, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly skipping to candidate matches, if pertinent. - prefilter: Option, - /// Whether this automaton anchors all matches to the start of input. - anchored: bool, - /// A set of equivalence classes in terms of bytes. We compute this while - /// building the NFA, but don't use it in the NFA's states. Instead, we - /// use this for building the DFA. We store it on the NFA since it's easy - /// to compute while visiting the patterns. - byte_classes: ByteClasses, - /// A set of states. Each state defines its own transitions, a fail - /// transition and a set of indices corresponding to matches. - /// - /// The first state is always the fail state, which is used only as a - /// sentinel. Namely, in the final NFA, no transition into the fail state - /// exists. (Well, they do, but they aren't followed. Instead, the state's - /// failure transition is followed.) - /// - /// The second state (index 1) is always the dead state. Dead states are - /// in every automaton, but only used when leftmost-{first,longest} match - /// semantics are enabled. Specifically, they instruct search to stop - /// at specific points in order to report the correct match location. In - /// the standard Aho-Corasick construction, there are no transitions to - /// the dead state. - /// - /// The third state (index 2) is generally intended to be the starting or - /// "root" state. - states: Vec>, -} - -impl NFA { - /// Returns the equivalence classes of bytes found while constructing - /// this NFA. - /// - /// Note that the NFA doesn't actually make use of these equivalence - /// classes. Instead, these are useful for building the DFA when desired. - pub fn byte_classes(&self) -> &ByteClasses { - &self.byte_classes - } - - /// Returns a prefilter, if one exists. - pub fn prefilter_obj(&self) -> Option<&PrefilterObj> { - self.prefilter.as_ref() - } - - /// Returns the total number of heap bytes used by this NFA's transition - /// table. - pub fn heap_bytes(&self) -> usize { - self.heap_bytes - + self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()) - } - - /// Return the length of the longest pattern in this automaton. - pub fn max_pattern_len(&self) -> usize { - self.max_pattern_len - } - - /// Return the total number of patterns added to this automaton. - pub fn pattern_count(&self) -> usize { - self.pattern_count - } - - /// Returns the total number of states in this NFA. - pub fn state_len(&self) -> usize { - self.states.len() - } - - /// Returns the matches for the given state. - pub fn matches(&self, id: S) -> &[(PatternID, PatternLength)] { - &self.states[id.to_usize()].matches - } - - /// Returns an iterator over all transitions in the given state according - /// to the given equivalence classes, including transitions to `fail_id()`. - /// The number of transitions returned is always equivalent to the number - /// of equivalence classes. - pub fn iter_all_transitions( - &self, - byte_classes: &ByteClasses, - id: S, - f: F, - ) { - self.states[id.to_usize()].trans.iter_all(byte_classes, f); - } - - /// Returns the failure transition for the given state. - pub fn failure_transition(&self, id: S) -> S { - self.states[id.to_usize()].fail - } - - /// Returns the next state for the given state and input byte. - /// - /// Note that this does not follow failure transitions. As such, the id - /// returned may be `fail_id`. - pub fn next_state(&self, current: S, input: u8) -> S { - self.states[current.to_usize()].next_state(input) - } - - fn state(&self, id: S) -> &State { - &self.states[id.to_usize()] - } - - fn state_mut(&mut self, id: S) -> &mut State { - &mut self.states[id.to_usize()] - } - - fn start(&self) -> &State { - self.state(self.start_id) - } - - fn start_mut(&mut self) -> &mut State { - let id = self.start_id; - self.state_mut(id) - } - - fn iter_transitions_mut(&mut self, id: S) -> IterTransitionsMut<'_, S> { - IterTransitionsMut::new(self, id) - } - - fn copy_matches(&mut self, src: S, dst: S) { - let (src, dst) = - get_two_mut(&mut self.states, src.to_usize(), dst.to_usize()); - dst.matches.extend_from_slice(&src.matches); - } - - fn copy_empty_matches(&mut self, dst: S) { - let start_id = self.start_id; - self.copy_matches(start_id, dst); - } - - fn add_dense_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Dense(Dense::new()); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } - - fn add_sparse_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Sparse(vec![]); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } -} - -impl Automaton for NFA { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.match_kind - } - - fn anchored(&self) -> bool { - self.anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.states.len() - } - - fn is_match_state(&self, id: S) -> bool { - self.states[id.to_usize()].is_match() - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - let state = match self.states.get(id.to_usize()) { - None => return None, - Some(state) => state, - }; - state.matches.get(match_index).map(|&(id, len)| Match { - pattern: id, - len, - end, - }) - } - - fn match_count(&self, id: S) -> usize { - self.states[id.to_usize()].matches.len() - } - - fn next_state(&self, mut current: S, input: u8) -> S { - // This terminates since: - // - // 1. `State.fail` never points to fail_id(). - // 2. All `State.fail` values point to a state closer to `start`. - // 3. The start state has no transitions to fail_id(). - loop { - let state = &self.states[current.to_usize()]; - let next = state.next_state(input); - if next != fail_id() { - return next; - } - current = state.fail; - } - } -} - -/// A representation of an NFA state for an Aho-Corasick automaton. -/// -/// It contains the transitions to the next state, a failure transition for -/// cases where there exists no other transition for the current input byte, -/// the matches implied by visiting this state (if any) and the depth of this -/// state. The depth of a state is simply the distance from it to the start -/// state in the automaton, where the depth of the start state is 0. -#[derive(Clone, Debug)] -pub struct State { - trans: Transitions, - fail: S, - matches: Vec<(PatternID, PatternLength)>, - // TODO: Strictly speaking, this isn't needed for searching. It's only - // used when building an NFA that supports leftmost match semantics. We - // could drop this from the state and dynamically build a map only when - // computing failure transitions, but it's not clear which is better. - // Benchmark this. - depth: usize, -} - -impl State { - fn heap_bytes(&self) -> usize { - self.trans.heap_bytes() - + (self.matches.len() * size_of::<(PatternID, PatternLength)>()) - } - - fn add_match(&mut self, i: PatternID, len: PatternLength) { - self.matches.push((i, len)); - } - - fn is_match(&self) -> bool { - !self.matches.is_empty() - } - - fn next_state(&self, input: u8) -> S { - self.trans.next_state(input) - } - - fn set_next_state(&mut self, input: u8, next: S) { - self.trans.set_next_state(input, next); - } -} - -/// Represents the transitions for a single dense state. -/// -/// The primary purpose here is to encapsulate index access. Namely, since a -/// dense representation always contains 256 elements, all values of `u8` are -/// valid indices. -#[derive(Clone, Debug)] -struct Dense(Vec); - -impl Dense -where - S: StateID, -{ - fn new() -> Self { - Dense(vec![fail_id(); 256]) - } - - #[inline] - fn len(&self) -> usize { - self.0.len() - } -} - -impl Index for Dense { - type Output = S; - - #[inline] - fn index(&self, i: u8) -> &S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &self.0[i as usize] - } -} - -impl IndexMut for Dense { - #[inline] - fn index_mut(&mut self, i: u8) -> &mut S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &mut self.0[i as usize] - } -} - -/// A representation of transitions in an NFA. -/// -/// Transitions have either a sparse representation, which is slower for -/// lookups but uses less memory, or a dense representation, which is faster -/// for lookups but uses more memory. In the sparse representation, the absence -/// of a state implies a transition to `fail_id()`. Transitions to `dead_id()` -/// are still explicitly represented. -/// -/// For the NFA, by default, we use a dense representation for transitions for -/// states close to the start state because it's likely these are the states -/// that will be most frequently visited. -#[derive(Clone, Debug)] -enum Transitions { - Sparse(Vec<(u8, S)>), - Dense(Dense), -} - -impl Transitions { - fn heap_bytes(&self) -> usize { - match *self { - Transitions::Sparse(ref sparse) => { - sparse.len() * size_of::<(u8, S)>() - } - Transitions::Dense(ref dense) => dense.len() * size_of::(), - } - } - - fn next_state(&self, input: u8) -> S { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - if b == input { - return id; - } - } - fail_id() - } - Transitions::Dense(ref dense) => dense[input], - } - } - - fn set_next_state(&mut self, input: u8, next: S) { - match *self { - Transitions::Sparse(ref mut sparse) => { - match sparse.binary_search_by_key(&input, |&(b, _)| b) { - Ok(i) => sparse[i] = (input, next), - Err(i) => sparse.insert(i, (input, next)), - } - } - Transitions::Dense(ref mut dense) => { - dense[input] = next; - } - } - } - - /// Iterate over transitions in this state while skipping over transitions - /// to `fail_id()`. - fn iter(&self, mut f: F) { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - f(b, id); - } - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - let id = dense[b]; - if id != fail_id() { - f(b, id); - } - } - } - } - } - - /// Iterate over all transitions in this state according to the given - /// equivalence classes, including transitions to `fail_id()`. - fn iter_all(&self, classes: &ByteClasses, mut f: F) { - if classes.is_singleton() { - match *self { - Transitions::Sparse(ref sparse) => { - sparse_iter(sparse, f); - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - f(b, dense[b]); - } - } - } - } else { - // In this case, we only want to yield a single byte for each - // equivalence class. - match *self { - Transitions::Sparse(ref sparse) => { - let mut last_class = None; - sparse_iter(sparse, |b, next| { - let class = classes.get(b); - if last_class != Some(class) { - last_class = Some(class); - f(b, next); - } - }) - } - Transitions::Dense(ref dense) => { - for b in classes.representatives() { - f(b, dense[b]); - } - } - } - } - } -} - -/// Iterator over transitions in a state, skipping transitions to `fail_id()`. -/// -/// This abstracts over the representation of NFA transitions, which may be -/// either in a sparse or dense representation. -/// -/// This somewhat idiosyncratically borrows the NFA mutably, so that when one -/// is iterating over transitions, the caller can still mutate the NFA. This -/// is useful when creating failure transitions. -#[derive(Debug)] -struct IterTransitionsMut<'a, S: StateID> { - nfa: &'a mut NFA, - state_id: S, - cur: usize, -} - -impl<'a, S: StateID> IterTransitionsMut<'a, S> { - fn new(nfa: &'a mut NFA, state_id: S) -> IterTransitionsMut<'a, S> { - IterTransitionsMut { nfa, state_id, cur: 0 } - } - - fn nfa(&mut self) -> &mut NFA { - self.nfa - } -} - -impl<'a, S: StateID> Iterator for IterTransitionsMut<'a, S> { - type Item = (u8, S); - - fn next(&mut self) -> Option<(u8, S)> { - match self.nfa.states[self.state_id.to_usize()].trans { - Transitions::Sparse(ref sparse) => { - if self.cur >= sparse.len() { - return None; - } - let i = self.cur; - self.cur += 1; - Some(sparse[i]) - } - Transitions::Dense(ref dense) => { - while self.cur < dense.len() { - // There are always exactly 255 transitions in dense repr. - debug_assert!(self.cur < 256); - - let b = self.cur as u8; - let id = dense[b]; - self.cur += 1; - if id != fail_id() { - return Some((b, id)); - } - } - None - } - } - } -} - -/// A simple builder for configuring the NFA construction of Aho-Corasick. -#[derive(Clone, Debug)] -pub struct Builder { - dense_depth: usize, - match_kind: MatchKind, - prefilter: bool, - anchored: bool, - ascii_case_insensitive: bool, -} - -impl Default for Builder { - fn default() -> Builder { - Builder { - dense_depth: 2, - match_kind: MatchKind::default(), - prefilter: true, - anchored: false, - ascii_case_insensitive: false, - } - } -} - -impl Builder { - pub fn new() -> Builder { - Builder::default() - } - - pub fn build(&self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - Compiler::new(self)?.compile(patterns) - } - - pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { - self.match_kind = kind; - self - } - - pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { - self.dense_depth = depth; - self - } - - pub fn prefilter(&mut self, yes: bool) -> &mut Builder { - self.prefilter = yes; - self - } - - pub fn anchored(&mut self, yes: bool) -> &mut Builder { - self.anchored = yes; - self - } - - pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { - self.ascii_case_insensitive = yes; - self - } -} - -/// A compiler uses a builder configuration and builds up the NFA formulation -/// of an Aho-Corasick automaton. This roughly corresponds to the standard -/// formulation described in textbooks. -#[derive(Debug)] -struct Compiler<'a, S: StateID> { - builder: &'a Builder, - prefilter: prefilter::Builder, - nfa: NFA, - byte_classes: ByteClassBuilder, -} - -impl<'a, S: StateID> Compiler<'a, S> { - fn new(builder: &'a Builder) -> Result> { - Ok(Compiler { - builder, - prefilter: prefilter::Builder::new(builder.match_kind) - .ascii_case_insensitive(builder.ascii_case_insensitive), - nfa: NFA { - match_kind: builder.match_kind, - start_id: usize_to_state_id(2)?, - max_pattern_len: 0, - pattern_count: 0, - heap_bytes: 0, - prefilter: None, - anchored: builder.anchored, - byte_classes: ByteClasses::singletons(), - states: vec![], - }, - byte_classes: ByteClassBuilder::new(), - }) - } - - fn compile(mut self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - self.add_state(0)?; // the fail state, which is never entered - self.add_state(0)?; // the dead state, only used for leftmost - self.add_state(0)?; // the start state - self.build_trie(patterns)?; - self.add_start_state_loop(); - self.add_dead_state_loop(); - if !self.builder.anchored { - self.fill_failure_transitions(); - } - self.close_start_state_loop(); - self.nfa.byte_classes = self.byte_classes.build(); - if !self.builder.anchored { - self.nfa.prefilter = self.prefilter.build(); - } - self.calculate_size(); - Ok(self.nfa) - } - - /// This sets up the initial prefix trie that makes up the Aho-Corasick - /// automaton. Effectively, it creates the basic structure of the - /// automaton, where every pattern given has a path from the start state to - /// the end of the pattern. - fn build_trie(&mut self, patterns: I) -> Result<()> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - 'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() { - let pat = pat.as_ref(); - self.nfa.max_pattern_len = - cmp::max(self.nfa.max_pattern_len, pat.len()); - self.nfa.pattern_count += 1; - - let mut prev = self.nfa.start_id; - let mut saw_match = false; - for (depth, &b) in pat.iter().enumerate() { - // When leftmost-first match semantics are requested, we - // specifically stop adding patterns when a previously added - // pattern is a prefix of it. We avoid adding it because - // leftmost-first semantics imply that the pattern can never - // match. This is not just an optimization to save space! It - // is necessary for correctness. In fact, this is the only - // difference in the automaton between the implementations for - // leftmost-first and leftmost-longest. - saw_match = saw_match || self.nfa.state(prev).is_match(); - if self.builder.match_kind.is_leftmost_first() && saw_match { - // Skip to the next pattern immediately. This avoids - // incorrectly adding a match after this loop terminates. - continue 'PATTERNS; - } - - // Add this byte to our equivalence classes. We don't use these - // for NFA construction. These are instead used only if we're - // building a DFA. They would technically be useful for the - // NFA, but it would require a second pass over the patterns. - self.byte_classes.set_range(b, b); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.byte_classes.set_range(b, b); - } - - // If the transition from prev using the current byte already - // exists, then just move through it. Otherwise, add a new - // state. We track the depth here so that we can determine - // how to represent transitions. States near the start state - // use a dense representation that uses more memory but is - // faster. Other states use a sparse representation that uses - // less memory but is slower. - let next = self.nfa.state(prev).next_state(b); - if next != fail_id() { - prev = next; - } else { - let next = self.add_state(depth + 1)?; - self.nfa.state_mut(prev).set_next_state(b, next); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.nfa.state_mut(prev).set_next_state(b, next); - } - prev = next; - } - } - // Once the pattern has been added, log the match in the final - // state that it reached. - self.nfa.state_mut(prev).add_match(pati, pat.len()); - // ... and hand it to the prefilter builder, if applicable. - if self.builder.prefilter { - self.prefilter.add(pat); - } - } - Ok(()) - } - - /// This routine creates failure transitions according to the standard - /// textbook formulation of the Aho-Corasick algorithm, with a couple small - /// tweaks to support "leftmost" semantics. - /// - /// Building failure transitions is the most interesting part of building - /// the Aho-Corasick automaton, because they are what allow searches to - /// be performed in linear time. Specifically, a failure transition is - /// a single transition associated with each state that points back to - /// the longest proper suffix of the pattern being searched. The failure - /// transition is followed whenever there exists no transition on the - /// current state for the current input byte. If there is no other proper - /// suffix, then the failure transition points back to the starting state. - /// - /// For example, let's say we built an Aho-Corasick automaton with the - /// following patterns: 'abcd' and 'cef'. The trie looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// At this point, it should be fairly straight-forward to see how this - /// trie can be used in a simplistic way. At any given position in the - /// text we're searching (called the "subject" string), all we need to do - /// is follow the transitions in the trie by consuming one transition for - /// each byte in the subject string. If we reach a match state, then we can - /// report that location as a match. - /// - /// The trick comes when searching a subject string like 'abcef'. We'll - /// initially follow the transition from S0 to S1 and wind up in S3 after - /// observng the 'c' byte. At this point, the next byte is 'e' but state - /// S3 has no transition for 'e', so the search fails. We then would need - /// to restart the search at the next position in 'abcef', which - /// corresponds to 'b'. The match would fail, but the next search starting - /// at 'c' would finally succeed. The problem with this approach is that - /// we wind up searching the subject string potentially many times. In - /// effect, this makes the algorithm have worst case `O(n * m)` complexity, - /// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead - /// like to achieve a `O(n + m)` worst case complexity. - /// - /// This is where failure transitions come in. Instead of dying at S3 in - /// the first search, the automaton can instruct the search to move to - /// another part of the automaton that corresponds to a suffix of what - /// we've seen so far. Recall that we've seen 'abc' in the subject string, - /// and the automaton does indeed have a non-empty suffix, 'c', that could - /// potentially lead to another match. Thus, the actual Aho-Corasick - /// automaton for our patterns in this case looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / / - /// / ---------------- - /// / / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// That is, we have a failure transition from S3 to S5, which is followed - /// exactly in cases when we are in state S3 but see any byte other than - /// 'd' (that is, we've "failed" to find a match in this portion of our - /// trie). We know we can transition back to S5 because we've already seen - /// a 'c' byte, so we don't need to re-scan it. We can then pick back up - /// with the search starting at S5 and complete our match. - /// - /// Adding failure transitions to a trie is fairly simple, but subtle. The - /// key issue is that you might have multiple failure transition that you - /// need to follow. For example, look at the trie for the patterns - /// 'abcd', 'b', 'bcd' and 'cd': - /// - /// ```ignore - /// - a - S1 - b - S2* - c - S3 - d - S4* - /// / / / - /// / ------- ------- - /// / / / - /// S0 --- b - S5* - c - S6 - d - S7* - /// \ / - /// \ -------- - /// \ / - /// - c - S8 - d - S9* - /// ``` - /// - /// The failure transitions for this trie are defined from S2 to S5, - /// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it - /// corresponds to a match, since its failure transition to S5 is itself - /// a match state. - /// - /// Perhaps simplest way to think about adding these failure transitions - /// is recursively. That is, if you know the failure transitions for every - /// possible previous state that could be visited (e.g., when computing the - /// failure transition for S3, you already know the failure transitions - /// for S0, S1 and S2), then you can simply follow the failure transition - /// of the previous state and check whether the incoming transition is - /// defined after following the failure transition. - /// - /// For example, when determining the failure state for S3, by our - /// assumptions, we already know that there is a failure transition from - /// S2 (the previous state) to S5. So we follow that transition and check - /// whether the transition connecting S2 to S3 is defined. Indeed, it is, - /// as there is a transition from S5 to S6 for the byte 'c'. If no such - /// transition existed, we could keep following the failure transitions - /// until we reach the start state, which is the failure transition for - /// every state that has no corresponding proper suffix. - /// - /// We don't actually use recursion to implement this, but instead, use a - /// breadth first search of the automaton. Our base case is the start - /// state, whose failure transition is just a transition to itself. - /// - /// When building a leftmost automaton, we proceed as above, but only - /// include a subset of failure transitions. Namely, we omit any failure - /// transitions that appear after a match state in the trie. This is - /// because failure transitions always point back to a proper suffix of - /// what has been seen so far. Thus, following a failure transition after - /// a match implies looking for a match that starts after the one that has - /// already been seen, which is of course therefore not the leftmost match. - /// - /// N.B. I came up with this algorithm on my own, and after scouring all of - /// the other AC implementations I know of (Perl, Snort, many on GitHub). - /// I couldn't find any that implement leftmost semantics like this. - /// Perl of course needs leftmost-first semantics, but they implement it - /// with a seeming hack at *search* time instead of encoding it into the - /// automaton. There are also a couple Java libraries that support leftmost - /// longest semantics, but they do it by building a queue of matches at - /// search time, which is even worse than what Perl is doing. ---AG - fn fill_failure_transitions(&mut self) { - let kind = self.match_kind(); - // Initialize the queue for breadth first search with all transitions - // out of the start state. We handle the start state specially because - // we only want to follow non-self transitions. If we followed self - // transitions, then this would never terminate. - let mut queue = VecDeque::new(); - let mut seen = self.queued_set(); - let mut it = self.nfa.iter_transitions_mut(self.nfa.start_id); - while let Some((_, next)) = it.next() { - // Skip anything we've seen before and any self-transitions on the - // start state. - if next == it.nfa().start_id || seen.contains(next) { - continue; - } - queue.push_back(next); - seen.insert(next); - // Under leftmost semantics, if a state immediately following - // the start state is a match state, then we never want to - // follow its failure transition since the failure transition - // necessarily leads back to the start state, which we never - // want to do for leftmost matching after a match has been - // found. - // - // We apply the same logic to non-start states below as well. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - } - } - while let Some(id) = queue.pop_front() { - let mut it = self.nfa.iter_transitions_mut(id); - while let Some((b, next)) = it.next() { - if seen.contains(next) { - // The only way to visit a duplicate state in a transition - // list is when ASCII case insensitivity is enabled. In - // this case, we want to skip it since it's redundant work. - // But it would also end up duplicating matches, which - // results in reporting duplicate matches in some cases. - // See the 'acasei010' regression test. - continue; - } - queue.push_back(next); - seen.insert(next); - - // As above for start states, under leftmost semantics, once - // we see a match all subsequent states should have no failure - // transitions because failure transitions always imply looking - // for a match that is a suffix of what has been seen so far - // (where "seen so far" corresponds to the string formed by - // following the transitions from the start state to the - // current state). Under leftmost semantics, we specifically do - // not want to allow this to happen because we always want to - // report the match found at the leftmost position. - // - // The difference between leftmost-first and leftmost-longest - // occurs previously while we build the trie. For - // leftmost-first, we simply omit any entries that would - // otherwise require passing through a match state. - // - // Note that for correctness, the failure transition has to be - // set to the dead state for ALL states following a match, not - // just the match state itself. However, by setting the failure - // transition to the dead state on all match states, the dead - // state will automatically propagate to all subsequent states - // via the failure state computation below. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - continue; - } - let mut fail = it.nfa().state(id).fail; - while it.nfa().state(fail).next_state(b) == fail_id() { - fail = it.nfa().state(fail).fail; - } - fail = it.nfa().state(fail).next_state(b); - it.nfa().state_mut(next).fail = fail; - it.nfa().copy_matches(fail, next); - } - // If the start state is a match state, then this automaton can - // match the empty string. This implies all states are match states - // since every position matches the empty string, so copy the - // matches from the start state to every state. Strictly speaking, - // this is only necessary for overlapping matches since each - // non-empty non-start match state needs to report empty matches - // in addition to its own. For the non-overlapping case, such - // states only report the first match, which is never empty since - // it isn't a start state. - if !kind.is_leftmost() { - it.nfa().copy_empty_matches(id); - } - } - } - - /// Returns a set that tracked queued states. - /// - /// This is only necessary when ASCII case insensitivity is enabled, since - /// it is the only way to visit the same state twice. Otherwise, this - /// returns an inert set that nevers adds anything and always reports - /// `false` for every member test. - fn queued_set(&self) -> QueuedSet { - if self.builder.ascii_case_insensitive { - QueuedSet::active() - } else { - QueuedSet::inert() - } - } - - /// Set the failure transitions on the start state to loop back to the - /// start state. This effectively permits the Aho-Corasick automaton to - /// match at any position. This is also required for finding the next - /// state to terminate, namely, finding the next state should never return - /// a fail_id. - /// - /// This must be done after building the initial trie, since trie - /// construction depends on transitions to `fail_id` to determine whether a - /// state already exists or not. - fn add_start_state_loop(&mut self) { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == fail_id() { - start.set_next_state(b, start_id); - } - } - } - - /// Remove the start state loop by rewriting any transitions on the start - /// state back to the start state with transitions to the dead state. - /// - /// The loop is only closed when two conditions are met: the start state - /// is a match state and the match kind is leftmost-first or - /// leftmost-longest. (Alternatively, if this is an anchored automaton, - /// then the start state is always closed, regardless of aforementioned - /// conditions.) - /// - /// The reason for this is that under leftmost semantics, a start state - /// that is also a match implies that we should never restart the search - /// process. We allow normal transitions out of the start state, but if - /// none exist, we transition to the dead state, which signals that - /// searching should stop. - fn close_start_state_loop(&mut self) { - if self.builder.anchored - || (self.match_kind().is_leftmost() && self.nfa.start().is_match()) - { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == start_id { - start.set_next_state(b, dead_id()); - } - } - } - } - - /// Sets all transitions on the dead state to point back to the dead state. - /// Normally, missing transitions map back to the failure state, but the - /// point of the dead state is to act as a sink that can never be escaped. - fn add_dead_state_loop(&mut self) { - let dead = self.nfa.state_mut(dead_id()); - for b in AllBytesIter::new() { - dead.set_next_state(b, dead_id()); - } - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = 0; - for state in &self.nfa.states { - size += size_of::>() + state.heap_bytes(); - } - self.nfa.heap_bytes = size; - } - - /// Add a new state to the underlying NFA with the given depth. The depth - /// is used to determine how to represent the transitions. - /// - /// If adding the new state would overflow the chosen state ID - /// representation, then this returns an error. - fn add_state(&mut self, depth: usize) -> Result { - if depth < self.builder.dense_depth { - self.nfa.add_dense_state(depth) - } else { - self.nfa.add_sparse_state(depth) - } - } - - /// Returns the match kind configured on the underlying builder. - fn match_kind(&self) -> MatchKind { - self.builder.match_kind - } -} - -/// A set of state identifiers used to avoid revisiting the same state multiple -/// times when filling in failure transitions. -/// -/// This set has an "inert" and an "active" mode. When inert, the set never -/// stores anything and always returns `false` for every member test. This is -/// useful to avoid the performance and memory overhead of maintaining this -/// set when it is not needed. -#[derive(Debug)] -struct QueuedSet { - set: Option>, -} - -impl QueuedSet { - /// Return an inert set that returns `false` for every state ID membership - /// test. - fn inert() -> QueuedSet { - QueuedSet { set: None } - } - - /// Return an active set that tracks state ID membership. - fn active() -> QueuedSet { - QueuedSet { set: Some(BTreeSet::new()) } - } - - /// Inserts the given state ID into this set. (If the set is inert, then - /// this is a no-op.) - fn insert(&mut self, state_id: S) { - if let Some(ref mut set) = self.set { - set.insert(state_id); - } - } - - /// Returns true if and only if the given state ID is in this set. If the - /// set is inert, this always returns false. - fn contains(&self, state_id: S) -> bool { - match self.set { - None => false, - Some(ref set) => set.contains(&state_id), - } - } -} - -/// An iterator over every byte value. -/// -/// We use this instead of (0..256).map(|b| b as u8) because this optimizes -/// better in debug builds. -/// -/// We also use this instead of 0..=255 because we're targeting Rust 1.24 and -/// inclusive range syntax was stabilized in Rust 1.26. We can get rid of this -/// once our MSRV is Rust 1.26 or newer. -#[derive(Debug)] -struct AllBytesIter(u16); - -impl AllBytesIter { - fn new() -> AllBytesIter { - AllBytesIter(0) - } -} - -impl Iterator for AllBytesIter { - type Item = u8; - - fn next(&mut self) -> Option { - if self.0 >= 256 { - None - } else { - let b = self.0 as u8; - self.0 += 1; - Some(b) - } - } -} - -impl fmt::Debug for NFA { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "NFA(")?; - writeln!(f, "match_kind: {:?}", self.match_kind)?; - writeln!(f, "prefilter: {:?}", self.prefilter)?; - writeln!(f, "{}", "-".repeat(79))?; - for (id, s) in self.states.iter().enumerate() { - let mut trans = vec![]; - s.trans.iter(|byte, next| { - // The start state has a bunch of uninteresting transitions - // back into itself. It's questionable to hide them since they - // are critical to understanding the automaton, but they are - // very noisy without better formatting for contiugous ranges - // to the same state. - if id == self.start_id.to_usize() && next == self.start_id { - return; - } - // Similarly, the dead state has a bunch of uninteresting - // transitions too. - if id == dead_id() { - return; - } - trans.push(format!("{} => {}", escape(byte), next.to_usize())); - }); - writeln!(f, "{:04}: {}", id, trans.join(", "))?; - - let matches: Vec = s - .matches - .iter() - .map(|&(pattern_id, _)| pattern_id.to_string()) - .collect(); - writeln!(f, " matches: {}", matches.join(", "))?; - writeln!(f, " fail: {}", s.fail.to_usize())?; - writeln!(f, " depth: {}", s.depth)?; - } - writeln!(f, "{}", "-".repeat(79))?; - writeln!(f, ")")?; - Ok(()) - } -} - -/// Iterate over all possible byte transitions given a sparse set. -fn sparse_iter(trans: &[(u8, S)], mut f: F) { - let mut byte = 0u16; - for &(b, id) in trans { - while byte < (b as u16) { - f(byte as u8, fail_id()); - byte += 1; - } - f(b, id); - byte += 1; - } - for b in byte..256 { - f(b as u8, fail_id()); - } -} - -/// Safely return two mutable borrows to two different locations in the given -/// slice. -/// -/// This panics if i == j. -fn get_two_mut(xs: &mut [T], i: usize, j: usize) -> (&mut T, &mut T) { - assert!(i != j, "{} must not be equal to {}", i, j); - if i < j { - let (before, after) = xs.split_at_mut(j); - (&mut before[i], &mut after[0]) - } else { - let (before, after) = xs.split_at_mut(i); - (&mut after[0], &mut before[j]) - } -} - -/// Return the given byte as its escaped string form. -fn escape(b: u8) -> String { - use std::ascii; - - String::from_utf8(ascii::escape_default(b).collect::>()).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let nfa: NFA = Builder::new() - .dense_depth(0) - // .match_kind(MatchKind::LeftmostShortest) - // .match_kind(MatchKind::LeftmostLongest) - .match_kind(MatchKind::LeftmostFirst) - // .build(&["abcd", "ce", "b"]) - // .build(&["ab", "bc"]) - // .build(&["b", "bcd", "ce"]) - // .build(&["abc", "bx"]) - // .build(&["abc", "bd", "ab"]) - // .build(&["abcdefghi", "hz", "abcdefgh"]) - // .build(&["abcd", "bce", "b"]) - .build(&["abcdefg", "bcde", "bcdef"]) - .unwrap(); - println!("{:?}", nfa); - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,625 +0,0 @@ -use std::u16; - -use crate::packed::pattern::Patterns; -use crate::packed::rabinkarp::RabinKarp; -use crate::packed::teddy::{self, Teddy}; -use crate::Match; - -/// This is a limit placed on the total number of patterns we're willing to try -/// and match at once. As more sophisticated algorithms are added, this number -/// may be increased. -const PATTERN_LIMIT: usize = 128; - -/// A knob for controlling the match semantics of a packed multiple string -/// searcher. -/// -/// This differs from the -/// [`MatchKind`](../enum.MatchKind.html) -/// type in the top-level crate module in that it doesn't support -/// "standard" match semantics, and instead only supports leftmost-first or -/// leftmost-longest. Namely, "standard" semantics cannot be easily supported -/// by packed searchers. -/// -/// For more information on the distinction between leftmost-first and -/// leftmost-longest, see the docs on the top-level `MatchKind` type. -/// -/// Unlike the top-level `MatchKind` type, the default match semantics for this -/// type are leftmost-first. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum MatchKind { - /// Use leftmost-first match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the match - /// corresponding to the pattern that appeared earlier when constructing - /// the automaton is reported. - /// - /// This is the default. - LeftmostFirst, - /// Use leftmost-longest match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the longest match - /// is chosen. - LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, -} - -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::LeftmostFirst - } -} - -/// The configuration for a packed multiple pattern searcher. -/// -/// The configuration is currently limited only to being able to select the -/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the -/// future, more knobs may be made available. -/// -/// A configuration produces a [`packed::Builder`](struct.Builder.html), which -/// in turn can be used to construct a -/// [`packed::Searcher`](struct.Searcher.html) for searching. -/// -/// # Example -/// -/// This example shows how to use leftmost-longest semantics instead of the -/// default (leftmost-first). -/// -/// ``` -/// use aho_corasick::packed::{Config, MatchKind}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Config::new() -/// .match_kind(MatchKind::LeftmostLongest) -/// .builder() -/// .add("foo") -/// .add("foobar") -/// .build()?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![1], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Config { - kind: MatchKind, - force: Option, - force_teddy_fat: Option, - force_avx: Option, -} - -/// An internal option for forcing the use of a particular packed algorithm. -/// -/// When an algorithm is forced, if a searcher could not be constructed for it, -/// then no searcher will be returned even if an alternative algorithm would -/// work. -#[derive(Clone, Debug)] -enum ForceAlgorithm { - Teddy, - RabinKarp, -} - -impl Default for Config { - fn default() -> Config { - Config::new() - } -} - -impl Config { - /// Create a new default configuration. A default configuration uses - /// leftmost-first match semantics. - pub fn new() -> Config { - Config { - kind: MatchKind::LeftmostFirst, - force: None, - force_teddy_fat: None, - force_avx: None, - } - } - - /// Create a packed builder from this configuration. The builder can be - /// used to accumulate patterns and create a - /// [`Searcher`](struct.Searcher.html) - /// from them. - pub fn builder(&self) -> Builder { - Builder::from_config(self.clone()) - } - - /// Set the match semantics for this configuration. - pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config { - self.kind = kind; - self - } - - /// An undocumented method for forcing the use of the Teddy algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_teddy(&mut self, yes: bool) -> &mut Config { - if yes { - self.force = Some(ForceAlgorithm::Teddy); - } else { - self.force = None; - } - self - } - - /// An undocumented method for forcing the use of the Fat Teddy algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_teddy_fat(&mut self, yes: Option) -> &mut Config { - self.force_teddy_fat = yes; - self - } - - /// An undocumented method for forcing the use of SSE (`Some(false)`) or - /// AVX (`Some(true)`) algorithms. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_avx(&mut self, yes: Option) -> &mut Config { - self.force_avx = yes; - self - } - - /// An undocumented method for forcing the use of the Rabin-Karp algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config { - if yes { - self.force = Some(ForceAlgorithm::RabinKarp); - } else { - self.force = None; - } - self - } -} - -/// A builder for constructing a packed searcher from a collection of patterns. -/// -/// # Example -/// -/// This example shows how to use a builder to construct a searcher. By -/// default, leftmost-first match semantics are used. -/// -/// ``` -/// use aho_corasick::packed::{Builder, MatchKind}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Builder::new() -/// .add("foobar") -/// .add("foo") -/// .build()?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![0], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Builder { - /// The configuration of this builder and subsequent matcher. - config: Config, - /// Set to true if the builder detects that a matcher cannot be built. - inert: bool, - /// The patterns provided by the caller. - patterns: Patterns, -} - -impl Builder { - /// Create a new builder for constructing a multi-pattern searcher. This - /// constructor uses the default configuration. - pub fn new() -> Builder { - Builder::from_config(Config::new()) - } - - fn from_config(config: Config) -> Builder { - Builder { config, inert: false, patterns: Patterns::new() } - } - - /// Build a searcher from the patterns added to this builder so far. - pub fn build(&self) -> Option { - if self.inert || self.patterns.is_empty() { - return None; - } - let mut patterns = self.patterns.clone(); - patterns.set_match_kind(self.config.kind); - let rabinkarp = RabinKarp::new(&patterns); - // Effectively, we only want to return a searcher if we can use Teddy, - // since Teddy is our only fast packed searcher at the moment. - // Rabin-Karp is only used when searching haystacks smaller than what - // Teddy can support. Thus, the only way to get a Rabin-Karp searcher - // is to force it using undocumented APIs (for tests/benchmarks). - let (search_kind, minimum_len) = match self.config.force { - None | Some(ForceAlgorithm::Teddy) => { - let teddy = match self.build_teddy(&patterns) { - None => return None, - Some(teddy) => teddy, - }; - let minimum_len = teddy.minimum_len(); - (SearchKind::Teddy(teddy), minimum_len) - } - Some(ForceAlgorithm::RabinKarp) => (SearchKind::RabinKarp, 0), - }; - Some(Searcher { patterns, rabinkarp, search_kind, minimum_len }) - } - - fn build_teddy(&self, patterns: &Patterns) -> Option { - teddy::Builder::new() - .avx(self.config.force_avx) - .fat(self.config.force_teddy_fat) - .build(&patterns) - } - - /// Add the given pattern to this set to match. - /// - /// The order in which patterns are added is significant. Namely, when - /// using leftmost-first match semantics, then when multiple patterns can - /// match at a particular location, the pattern that was added first is - /// used as the match. - /// - /// If the number of patterns added exceeds the amount supported by packed - /// searchers, then the builder will stop accumulating patterns and render - /// itself inert. At this point, constructing a searcher will always return - /// `None`. - pub fn add>(&mut self, pattern: P) -> &mut Builder { - if self.inert { - return self; - } else if self.patterns.len() >= PATTERN_LIMIT { - self.inert = true; - self.patterns.reset(); - return self; - } - // Just in case PATTERN_LIMIT increases beyond u16::MAX. - assert!(self.patterns.len() <= u16::MAX as usize); - - let pattern = pattern.as_ref(); - if pattern.is_empty() { - self.inert = true; - self.patterns.reset(); - return self; - } - self.patterns.add(pattern); - self - } - - /// Add the given iterator of patterns to this set to match. - /// - /// The iterator must yield elements that can be converted into a `&[u8]`. - /// - /// The order in which patterns are added is significant. Namely, when - /// using leftmost-first match semantics, then when multiple patterns can - /// match at a particular location, the pattern that was added first is - /// used as the match. - /// - /// If the number of patterns added exceeds the amount supported by packed - /// searchers, then the builder will stop accumulating patterns and render - /// itself inert. At this point, constructing a searcher will always return - /// `None`. - pub fn extend(&mut self, patterns: I) -> &mut Builder - where - I: IntoIterator, - P: AsRef<[u8]>, - { - for p in patterns { - self.add(p); - } - self - } -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -/// A packed searcher for quickly finding occurrences of multiple patterns. -/// -/// If callers need more flexible construction, or if one wants to change the -/// match semantics (either leftmost-first or leftmost-longest), then one can -/// use the [`Config`](struct.Config.html) and/or -/// [`Builder`](struct.Builder.html) types for more fine grained control. -/// -/// # Example -/// -/// This example shows how to create a searcher from an iterator of patterns. -/// By default, leftmost-first match semantics are used. -/// -/// ``` -/// use aho_corasick::packed::{MatchKind, Searcher}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![0], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Searcher { - patterns: Patterns, - rabinkarp: RabinKarp, - search_kind: SearchKind, - minimum_len: usize, -} - -#[derive(Clone, Debug)] -enum SearchKind { - Teddy(Teddy), - RabinKarp, -} - -impl Searcher { - /// A convenience function for constructing a searcher from an iterator - /// of things that can be converted to a `&[u8]`. - /// - /// If a searcher could not be constructed (either because of an - /// unsupported CPU or because there are too many patterns), then `None` - /// is returned. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher - /// .find_iter("foobar") - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0], matches); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn new(patterns: I) -> Option - where - I: IntoIterator, - P: AsRef<[u8]>, - { - Builder::new().extend(patterns).build() - } - - /// Return the first occurrence of any of the patterns in this searcher, - /// according to its match semantics, in the given haystack. The `Match` - /// returned will include the identifier of the pattern that matched, which - /// corresponds to the index of the pattern (starting from `0`) in which it - /// was added. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let mat = searcher.find("foobar")?; - /// assert_eq!(0, mat.pattern()); - /// assert_eq!(0, mat.start()); - /// assert_eq!(6, mat.end()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find>(&self, haystack: B) -> Option { - self.find_at(haystack, 0) - } - - /// Return the first occurrence of any of the patterns in this searcher, - /// according to its match semantics, in the given haystack starting from - /// the given position. - /// - /// The `Match` returned will include the identifier of the pattern that - /// matched, which corresponds to the index of the pattern (starting from - /// `0`) in which it was added. The offsets in the `Match` will be relative - /// to the start of `haystack` (and not `at`). - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let mat = searcher.find_at("foofoobar", 3)?; - /// assert_eq!(0, mat.pattern()); - /// assert_eq!(3, mat.start()); - /// assert_eq!(9, mat.end()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find_at>( - &self, - haystack: B, - at: usize, - ) -> Option { - let haystack = haystack.as_ref(); - match self.search_kind { - SearchKind::Teddy(ref teddy) => { - if haystack[at..].len() < teddy.minimum_len() { - return self.slow_at(haystack, at); - } - teddy.find_at(&self.patterns, haystack, at) - } - SearchKind::RabinKarp => { - self.rabinkarp.find_at(&self.patterns, haystack, at) - } - } - } - - /// Return an iterator of non-overlapping occurrences of the patterns in - /// this searcher, according to its match semantics, in the given haystack. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher - /// .find_iter("foobar fooba foofoo") - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 1, 1, 1], matches); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindIter<'a, 'b> { - FindIter { searcher: self, haystack: haystack.as_ref(), at: 0 } - } - - /// Returns the match kind used by this packed searcher. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// // leftmost-first is the default. - /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn match_kind(&self) -> &MatchKind { - self.patterns.match_kind() - } - - /// Returns the minimum length of a haystack that is required in order for - /// packed searching to be effective. - /// - /// In some cases, the underlying packed searcher may not be able to search - /// very short haystacks. When that occurs, the implementation will defer - /// to a slower non-packed searcher (which is still generally faster than - /// Aho-Corasick for a small number of patterns). However, callers may - /// want to avoid ever using the slower variant, which one can do by - /// never passing a haystack shorter than the minimum length returned by - /// this method. - pub fn minimum_len(&self) -> usize { - self.minimum_len - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - self.patterns.heap_bytes() - + self.rabinkarp.heap_bytes() - + self.search_kind.heap_bytes() - } - - /// Use a slow (non-packed) searcher. - /// - /// This is useful when a packed searcher could be constructed, but could - /// not be used to search a specific haystack. For example, if Teddy was - /// built but the haystack is smaller than ~34 bytes, then Teddy might not - /// be able to run. - fn slow_at(&self, haystack: &[u8], at: usize) -> Option { - self.rabinkarp.find_at(&self.patterns, haystack, at) - } -} - -impl SearchKind { - fn heap_bytes(&self) -> usize { - match *self { - SearchKind::Teddy(ref ted) => ted.heap_bytes(), - SearchKind::RabinKarp => 0, - } - } -} - -/// An iterator over non-overlapping matches from a packed searcher. -/// -/// The lifetime `'s` refers to the lifetime of the underlying -/// [`Searcher`](struct.Searcher.html), while the lifetime `'h` refers to the -/// lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindIter<'s, 'h> { - searcher: &'s Searcher, - haystack: &'h [u8], - at: usize, -} - -impl<'s, 'h> Iterator for FindIter<'s, 'h> { - type Item = Match; - - fn next(&mut self) -> Option { - if self.at > self.haystack.len() { - return None; - } - match self.searcher.find_at(&self.haystack, self.at) { - None => None, - Some(c) => { - self.at = c.end; - Some(c) - } - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/*! -A lower level API for packed multiple substring search, principally for a small -number of patterns. - -This sub-module provides vectorized routines for quickly finding matches of a -small number of patterns. In general, users of this crate shouldn't need to -interface with this module directly, as the primary -[`AhoCorasick`](../struct.AhoCorasick.html) -searcher will use these routines automatically as a prefilter when applicable. -However, in some cases, callers may want to bypass the Aho-Corasick machinery -entirely and use this vectorized searcher directly. - -# Overview - -The primary types in this sub-module are: - -* [`Searcher`](struct.Searcher.html) executes the actual search algorithm to - report matches in a haystack. -* [`Builder`](struct.Builder.html) accumulates patterns incrementally and can - construct a `Searcher`. -* [`Config`](struct.Config.html) permits tuning the searcher, and itself will - produce a `Builder` (which can then be used to build a `Searcher`). - Currently, the only tuneable knob are the match semantics, but this may be - expanded in the future. - -# Examples - -This example shows how to create a searcher from an iterator of patterns. -By default, leftmost-first match semantics are used. (See the top-level -[`MatchKind`](../enum.MatchKind.html) type for more details about match -semantics, which apply similarly to packed substring search.) - -``` -use aho_corasick::packed::{MatchKind, Searcher}; - -# fn example() -> Option<()> { -let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -let matches: Vec = searcher - .find_iter("foobar") - .map(|mat| mat.pattern()) - .collect(); -assert_eq!(vec![0], matches); -# Some(()) } -# if cfg!(target_arch = "x86_64") { -# example().unwrap() -# } else { -# assert!(example().is_none()); -# } -``` - -This example shows how to use [`Config`](struct.Config.html) to change the -match semantics to leftmost-longest: - -``` -use aho_corasick::packed::{Config, MatchKind}; - -# fn example() -> Option<()> { -let searcher = Config::new() - .match_kind(MatchKind::LeftmostLongest) - .builder() - .add("foo") - .add("foobar") - .build()?; -let matches: Vec = searcher - .find_iter("foobar") - .map(|mat| mat.pattern()) - .collect(); -assert_eq!(vec![1], matches); -# Some(()) } -# if cfg!(target_arch = "x86_64") { -# example().unwrap() -# } else { -# assert!(example().is_none()); -# } -``` - -# Packed substring searching - -Packed substring searching refers to the use of SIMD (Single Instruction, -Multiple Data) to accelerate the detection of matches in a haystack. Unlike -conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring -search tend to do better with a small number of patterns, where as Aho-Corasick -generally maintains reasonably consistent performance regardless of the number -of patterns you give it. Because of this, the vectorized searcher in this -sub-module cannot be used as a general purpose searcher, since building the -searcher may fail. However, in exchange, when searching for a small number of -patterns, searching can be quite a bit faster than Aho-Corasick (sometimes by -an order of magnitude). - -The key take away here is that constructing a searcher from a list of patterns -is a fallible operation. While the precise conditions under which building a -searcher can fail is specifically an implementation detail, here are some -common reasons: - -* Too many patterns were given. Typically, the limit is on the order of 100 or - so, but this limit may fluctuate based on available CPU features. -* The available packed algorithms require CPU features that aren't available. - For example, currently, this crate only provides packed algorithms for - `x86_64`. Therefore, constructing a packed searcher on any other target - (e.g., ARM) will always fail. -* Zero patterns were given, or one of the patterns given was empty. Packed - searchers require at least one pattern and that all patterns are non-empty. -* Something else about the nature of the patterns (typically based on - heuristics) suggests that a packed searcher would perform very poorly, so - no searcher is built. -*/ - -pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher}; - -mod api; -mod pattern; -mod rabinkarp; -mod teddy; -#[cfg(test)] -mod tests; -#[cfg(target_arch = "x86_64")] -mod vector; diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,318 +0,0 @@ -use std::cmp; -use std::fmt; -use std::mem; -use std::u16; -use std::usize; - -use crate::packed::api::MatchKind; - -/// The type used for representing a pattern identifier. -/// -/// We don't use `usize` here because our packed searchers don't scale to -/// huge numbers of patterns, so we keep things a bit smaller. -pub type PatternID = u16; - -/// A non-empty collection of non-empty patterns to search for. -/// -/// This collection of patterns is what is passed around to both execute -/// searches and to construct the searchers themselves. Namely, this permits -/// searches to avoid copying all of the patterns, and allows us to keep only -/// one copy throughout all packed searchers. -/// -/// Note that this collection is not a set. The same pattern can appear more -/// than once. -#[derive(Clone, Debug)] -pub struct Patterns { - /// The match semantics supported by this collection of patterns. - /// - /// The match semantics determines the order of the iterator over patterns. - /// For leftmost-first, patterns are provided in the same order as were - /// provided by the caller. For leftmost-longest, patterns are provided in - /// descending order of length, with ties broken by the order in which they - /// were provided by the caller. - kind: MatchKind, - /// The collection of patterns, indexed by their identifier. - by_id: Vec>, - /// The order of patterns defined for iteration, given by pattern - /// identifiers. The order of `by_id` and `order` is always the same for - /// leftmost-first semantics, but may be different for leftmost-longest - /// semantics. - order: Vec, - /// The length of the smallest pattern, in bytes. - minimum_len: usize, - /// The largest pattern identifier. This should always be equivalent to - /// the number of patterns minus one in this collection. - max_pattern_id: PatternID, - /// The total number of pattern bytes across the entire collection. This - /// is used for reporting total heap usage in constant time. - total_pattern_bytes: usize, -} - -impl Patterns { - /// Create a new collection of patterns for the given match semantics. The - /// ID of each pattern is the index of the pattern at which it occurs in - /// the `by_id` slice. - /// - /// If any of the patterns in the slice given are empty, then this panics. - /// Similarly, if the number of patterns given is zero, then this also - /// panics. - pub fn new() -> Patterns { - Patterns { - kind: MatchKind::default(), - by_id: vec![], - order: vec![], - minimum_len: usize::MAX, - max_pattern_id: 0, - total_pattern_bytes: 0, - } - } - - /// Add a pattern to this collection. - /// - /// This panics if the pattern given is empty. - pub fn add(&mut self, bytes: &[u8]) { - assert!(!bytes.is_empty()); - assert!(self.by_id.len() <= u16::MAX as usize); - - let id = self.by_id.len() as u16; - self.max_pattern_id = id; - self.order.push(id); - self.by_id.push(bytes.to_vec()); - self.minimum_len = cmp::min(self.minimum_len, bytes.len()); - self.total_pattern_bytes += bytes.len(); - } - - /// Set the match kind semantics for this collection of patterns. - /// - /// If the kind is not set, then the default is leftmost-first. - pub fn set_match_kind(&mut self, kind: MatchKind) { - match kind { - MatchKind::LeftmostFirst => { - self.order.sort(); - } - MatchKind::LeftmostLongest => { - let (order, by_id) = (&mut self.order, &mut self.by_id); - order.sort_by(|&id1, &id2| { - by_id[id1 as usize] - .len() - .cmp(&by_id[id2 as usize].len()) - .reverse() - }); - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } - - /// Return the number of patterns in this collection. - /// - /// This is guaranteed to be greater than zero. - pub fn len(&self) -> usize { - self.by_id.len() - } - - /// Returns true if and only if this collection of patterns is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the approximate total amount of heap used by these patterns, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - self.order.len() * mem::size_of::() - + self.by_id.len() * mem::size_of::>() - + self.total_pattern_bytes - } - - /// Clears all heap memory associated with this collection of patterns and - /// resets all state such that it is a valid empty collection. - pub fn reset(&mut self) { - self.kind = MatchKind::default(); - self.by_id.clear(); - self.order.clear(); - self.minimum_len = usize::MAX; - self.max_pattern_id = 0; - } - - /// Return the maximum pattern identifier in this collection. This can be - /// useful in searchers for ensuring that the collection of patterns they - /// are provided at search time and at build time have the same size. - pub fn max_pattern_id(&self) -> PatternID { - assert_eq!((self.max_pattern_id + 1) as usize, self.len()); - self.max_pattern_id - } - - /// Returns the length, in bytes, of the smallest pattern. - /// - /// This is guaranteed to be at least one. - pub fn minimum_len(&self) -> usize { - self.minimum_len - } - - /// Returns the match semantics used by these patterns. - pub fn match_kind(&self) -> &MatchKind { - &self.kind - } - - /// Return the pattern with the given identifier. If such a pattern does - /// not exist, then this panics. - pub fn get(&self, id: PatternID) -> Pattern<'_> { - Pattern(&self.by_id[id as usize]) - } - - /// Return the pattern with the given identifier without performing bounds - /// checks. - /// - /// # Safety - /// - /// Callers must ensure that a pattern with the given identifier exists - /// before using this method. - #[cfg(target_arch = "x86_64")] - pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { - Pattern(self.by_id.get_unchecked(id as usize)) - } - - /// Return an iterator over all the patterns in this collection, in the - /// order in which they should be matched. - /// - /// Specifically, in a naive multi-pattern matcher, the following is - /// guaranteed to satisfy the match semantics of this collection of - /// patterns: - /// - /// ```ignore - /// for i in 0..haystack.len(): - /// for p in patterns.iter(): - /// if haystack[i..].starts_with(p.bytes()): - /// return Match(p.id(), i, i + p.bytes().len()) - /// ``` - /// - /// Namely, among the patterns in a collection, if they are matched in - /// the order provided by this iterator, then the result is guaranteed - /// to satisfy the correct match semantics. (Either leftmost-first or - /// leftmost-longest.) - pub fn iter(&self) -> PatternIter<'_> { - PatternIter { patterns: self, i: 0 } - } -} - -/// An iterator over the patterns in the `Patterns` collection. -/// -/// The order of the patterns provided by this iterator is consistent with the -/// match semantics of the originating collection of patterns. -/// -/// The lifetime `'p` corresponds to the lifetime of the collection of patterns -/// this is iterating over. -#[derive(Debug)] -pub struct PatternIter<'p> { - patterns: &'p Patterns, - i: usize, -} - -impl<'p> Iterator for PatternIter<'p> { - type Item = (PatternID, Pattern<'p>); - - fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> { - if self.i >= self.patterns.len() { - return None; - } - let id = self.patterns.order[self.i]; - let p = self.patterns.get(id); - self.i += 1; - Some((id, p)) - } -} - -/// A pattern that is used in packed searching. -#[derive(Clone)] -pub struct Pattern<'a>(&'a [u8]); - -impl<'a> fmt::Debug for Pattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Pattern") - .field("lit", &String::from_utf8_lossy(&self.0)) - .finish() - } -} - -impl<'p> Pattern<'p> { - /// Returns the length of this pattern, in bytes. - pub fn len(&self) -> usize { - self.0.len() - } - - /// Returns the bytes of this pattern. - pub fn bytes(&self) -> &[u8] { - &self.0 - } - - /// Returns the first `len` low nybbles from this pattern. If this pattern - /// is shorter than `len`, then this panics. - #[cfg(target_arch = "x86_64")] - pub fn low_nybbles(&self, len: usize) -> Vec { - let mut nybs = vec![]; - for &b in self.bytes().iter().take(len) { - nybs.push(b & 0xF); - } - nybs - } - - /// Returns true if this pattern is a prefix of the given bytes. - #[inline(always)] - pub fn is_prefix(&self, bytes: &[u8]) -> bool { - self.len() <= bytes.len() && self.equals(&bytes[..self.len()]) - } - - /// Returns true if and only if this pattern equals the given bytes. - #[inline(always)] - pub fn equals(&self, bytes: &[u8]) -> bool { - // Why not just use memcmp for this? Well, memcmp requires calling out - // to libc, and this routine is called in fairly hot code paths. Other - // than just calling out to libc, it also seems to result in worse - // codegen. By rolling our own memcpy in pure Rust, it seems to appear - // more friendly to the optimizer. - // - // This results in an improvement in just about every benchmark. Some - // smaller than others, but in some cases, up to 30% faster. - - if self.len() != bytes.len() { - return false; - } - if self.len() < 8 { - for (&b1, &b2) in self.bytes().iter().zip(bytes) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 8 or more bytes to compare, then proceed in chunks of - // 8 at a time using unaligned loads. - let mut p1 = self.bytes().as_ptr(); - let mut p2 = bytes.as_ptr(); - let p1end = self.bytes()[self.len() - 8..].as_ptr(); - let p2end = bytes[bytes.len() - 8..].as_ptr(); - // SAFETY: Via the conditional above, we know that both `p1` and `p2` - // have the same length, so `p1 < p1end` implies that `p2 < p2end`. - // Thus, derefencing both `p1` and `p2` in the loop below is safe. - // - // Moreover, we set `p1end` and `p2end` to be 8 bytes before the actual - // end of of `p1` and `p2`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. - // - // Finally, we needn't worry about 64-bit alignment here, since we - // do unaligned loads. - unsafe { - while p1 < p1end { - let v1 = (p1 as *const u64).read_unaligned(); - let v2 = (p2 as *const u64).read_unaligned(); - if v1 != v2 { - return false; - } - p1 = p1.add(8); - p2 = p2.add(8); - } - let v1 = (p1end as *const u64).read_unaligned(); - let v2 = (p2end as *const u64).read_unaligned(); - v1 == v2 - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,185 +0,0 @@ -use std::mem; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::Match; - -/// The type of the rolling hash used in the Rabin-Karp algorithm. -type Hash = usize; - -/// The number of buckets to store our patterns in. We don't want this to be -/// too big in order to avoid wasting memory, but we don't want it to be too -/// small either to avoid spending too much time confirming literals. -/// -/// The number of buckets MUST be a power of two. Otherwise, determining the -/// bucket from a hash will slow down the code considerably. Using a power -/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and` -/// instruction. -const NUM_BUCKETS: usize = 64; - -/// An implementation of the Rabin-Karp algorithm. The main idea of this -/// algorithm is to maintain a rolling hash as it moves through the input, and -/// then check whether that hash corresponds to the same hash for any of the -/// patterns we're looking for. -/// -/// A draw back of naively scaling Rabin-Karp to multiple patterns is that -/// it requires all of the patterns to be the same length, which in turn -/// corresponds to the number of bytes to hash. We adapt this to work for -/// multiple patterns of varying size by fixing the number of bytes to hash -/// to be the length of the smallest pattern. We also split the patterns into -/// several buckets to hopefully make the confirmation step faster. -/// -/// Wikipedia has a decent explanation, if a bit heavy on the theory: -/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm -/// -/// But ESMAJ provides something a bit more concrete: -/// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html -#[derive(Clone, Debug)] -pub struct RabinKarp { - /// The order of patterns in each bucket is significant. Namely, they are - /// arranged such that the first one to match is the correct match. This - /// may not necessarily correspond to the order provided by the caller. - /// For example, if leftmost-longest semantics are used, then the patterns - /// are sorted by their length in descending order. If leftmost-first - /// semantics are used, then the patterns are sorted by their pattern ID - /// in ascending order (which corresponds to the caller's order). - buckets: Vec>, - /// The length of the hashing window. Generally, this corresponds to the - /// length of the smallest pattern. - hash_len: usize, - /// The factor to subtract out of a hash before updating it with a new - /// byte. - hash_2pow: usize, - /// The maximum identifier of a pattern. This is used as a sanity check - /// to ensure that the patterns provided by the caller are the same as - /// the patterns that were used to compile the matcher. This sanity check - /// possibly permits safely eliminating bounds checks regardless of what - /// patterns are provided by the caller. - /// - /// (Currently, we don't use this to elide bounds checks since it doesn't - /// result in a measurable performance improvement, but we do use it for - /// better failure modes.) - max_pattern_id: PatternID, -} - -impl RabinKarp { - /// Compile a new Rabin-Karp matcher from the patterns given. - /// - /// This panics if any of the patterns in the collection are empty, or if - /// the collection is itself empty. - pub fn new(patterns: &Patterns) -> RabinKarp { - assert!(patterns.len() >= 1); - let hash_len = patterns.minimum_len(); - assert!(hash_len >= 1); - - let mut hash_2pow = 1usize; - for _ in 1..hash_len { - hash_2pow = hash_2pow.wrapping_shl(1); - } - - let mut rk = RabinKarp { - buckets: vec![vec![]; NUM_BUCKETS], - hash_len, - hash_2pow, - max_pattern_id: patterns.max_pattern_id(), - }; - for (id, pat) in patterns.iter() { - let hash = rk.hash(&pat.bytes()[..rk.hash_len]); - let bucket = hash % NUM_BUCKETS; - rk.buckets[bucket].push((hash, id)); - } - rk - } - - /// Return the first matching pattern in the given haystack, begining the - /// search at `at`. - pub fn find_at( - &self, - patterns: &Patterns, - haystack: &[u8], - mut at: usize, - ) -> Option { - assert_eq!(NUM_BUCKETS, self.buckets.len()); - assert_eq!( - self.max_pattern_id, - patterns.max_pattern_id(), - "Rabin-Karp must be called with same patterns it was built with", - ); - - if at + self.hash_len > haystack.len() { - return None; - } - let mut hash = self.hash(&haystack[at..at + self.hash_len]); - loop { - let bucket = &self.buckets[hash % NUM_BUCKETS]; - for &(phash, pid) in bucket { - if phash == hash { - if let Some(c) = self.verify(patterns, pid, haystack, at) { - return Some(c); - } - } - } - if at + self.hash_len >= haystack.len() { - return None; - } - hash = self.update_hash( - hash, - haystack[at], - haystack[at + self.hash_len], - ); - at += 1; - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::<(Hash, PatternID)>() - } - - /// Verify whether the pattern with the given id matches at - /// `haystack[at..]`. - /// - /// We tag this function as `cold` because it helps improve codegen. - /// Intuitively, it would seem like inlining it would be better. However, - /// the only time this is called and a match is not found is when there - /// there is a hash collision, or when a prefix of a pattern matches but - /// the entire pattern doesn't match. This is hopefully fairly rare, and - /// if it does occur a lot, it's going to be slow no matter what we do. - #[cold] - fn verify( - &self, - patterns: &Patterns, - id: PatternID, - haystack: &[u8], - at: usize, - ) -> Option { - let pat = patterns.get(id); - if pat.is_prefix(&haystack[at..]) { - Some(Match::from_span(id as usize, at, at + pat.len())) - } else { - None - } - } - - /// Hash the given bytes. - fn hash(&self, bytes: &[u8]) -> Hash { - assert_eq!(self.hash_len, bytes.len()); - - let mut hash = 0usize; - for &b in bytes { - hash = hash.wrapping_shl(1).wrapping_add(b as usize); - } - hash - } - - /// Update the hash given based on removing `old_byte` at the beginning - /// of some byte string, and appending `new_byte` to the end of that same - /// byte string. - fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash { - prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow)) - .wrapping_shl(1) - .wrapping_add(new_byte as usize) - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,386 +0,0 @@ -Teddy is a SIMD accelerated multiple substring matching algorithm. The name -and the core ideas in the algorithm were learned from the [Hyperscan][1_u] -project. The implementation in this repository was mostly motivated for use in -accelerating regex searches by searching for small sets of required literals -extracted from the regex. - - -# Background - -The key idea of Teddy is to do *packed* substring matching. In the literature, -packed substring matching is the idea of examining multiple bytes in a haystack -at a time to detect matches. Implementations of, for example, memchr (which -detects matches of a single byte) have been doing this for years. Only -recently, with the introduction of various SIMD instructions, has this been -extended to substring matching. The PCMPESTRI instruction (and its relatives), -for example, implements substring matching in hardware. It is, however, limited -to substrings of length 16 bytes or fewer, but this restriction is fine in a -regex engine, since we rarely care about the performance difference between -searching for a 16 byte literal and a 16 + N literal; 16 is already long -enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs -at least, is its latency and throughput. As a result, it is often faster to -do substring search with a Boyer-Moore (or Two-Way) variant and a well placed -memchr to quickly skip through the haystack. - -There are fewer results from the literature on packed substring matching, -and even fewer for packed multiple substring matching. Ben-Kiki et al. [2] -describes use of PCMPESTRI for substring matching, but is mostly theoretical -and hand-waves performance. There is other theoretical work done by Bille [3] -as well. - -The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci -and is generally focused on multiple pattern search. Their first paper [4a] -introduces the concept of a fingerprint, which is computed for every block of -N bytes in every pattern. The haystack is then scanned N bytes at a time and -a fingerprint is computed in the same way it was computed for blocks in the -patterns. If the fingerprint corresponds to one that was found in a pattern, -then a verification step follows to confirm that one of the substrings with the -corresponding fingerprint actually matches at the current location. Various -implementation tricks are employed to make sure the fingerprint lookup is fast; -typically by truncating the fingerprint. (This may, of course, provoke more -steps in the verification process, so a balance must be struck.) - -The main downside of [4a] is that the minimum substring length is 32 bytes, -presumably because of how the algorithm uses certain SIMD instructions. This -essentially makes it useless for general purpose regex matching, where a small -number of short patterns is far more likely. - -Faro and Kulekci published another paper [4b] that is conceptually very similar -to [4a]. The key difference is that it uses the CRC32 instruction (introduced -as part of SSE 4.2) to compute fingerprint values. This also enables the -algorithm to work effectively on substrings as short as 7 bytes with 4 byte -windows. 7 bytes is unfortunately still too long. The window could be -technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the -small window size ends up negating most performance benefits—and it's likely -the common case in a general purpose regex engine. - -Faro and Kulekci also published [4c] that appears to be intended as a -replacement to using PCMPESTRI. In particular, it is specifically motivated by -the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD -instructions that are faster. While this approach works for short substrings, -I personally couldn't see a way to generalize it to multiple substring search. - -Faro and Kulekci have another paper [4d] that I haven't been able to read -because it is behind a paywall. - - -# Teddy - -Finally, we get to Teddy. If the above literature review is complete, then it -appears that Teddy is a novel algorithm. More than that, in my experience, it -completely blows away the competition for short substrings, which is exactly -what we want in a general purpose regex engine. Again, the algorithm appears -to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced -late 2015, and no earlier history could be found. Therefore, tracking the exact -provenance of the algorithm with respect to the published literature seems -difficult. - -At a high level, Teddy works somewhat similarly to the fingerprint algorithms -published by Faro and Kulekci, but Teddy does it in a way that scales a bit -better. Namely: - -1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX) - byte chunks. 16 (or 32) is significant because it corresponds to the number - of bytes in a SIMD vector. -2. Bitwise operations are performed on each chunk to discover if any region of - it matches a set of precomputed fingerprints from the patterns. If there are - matches, then a verification step is performed. In this implementation, our - verification step is naive. This can be improved upon. - -The details to make this work are quite clever. First, we must choose how to -pick our fingerprints. In Hyperscan's implementation, I *believe* they use the -last N bytes of each substring, where N must be at least the minimum length of -any substring in the set being searched. In this implementation, we use the -first N bytes of each substring. (The tradeoffs between these choices aren't -yet clear to me.) We then must figure out how to quickly test whether an -occurrence of any fingerprint from the set of patterns appears in a 16 byte -block from the haystack. To keep things simple, let's assume N = 1 and examine -some examples to motivate the approach. Here are our patterns: - -```ignore -foo -bar -baz -``` - -The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set -our 16 byte block to: - -```ignore -bat cat foo bump -xxxxxxxxxxxxxxxx -``` - -To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates -a mask that allows us to quickly compute membership of a fingerprint in a 16 -byte block that also tells which pattern the fingerprint corresponds to. In -this case, our fingerprint is a single byte, so an appropriate abstraction is -a map from a single byte to a list of patterns that contain that fingerprint: - -```ignore -f |--> foo -b |--> bar, baz -``` - -Now, all we need to do is figure out how to represent this map in vector space -and use normal SIMD operations to perform a lookup. The first simplification -we can make is to represent our patterns as bit fields occupying a single -byte. This is important, because a single SIMD vector can store 16 bytes. - -```ignore -f |--> 00000001 -b |--> 00000010, 00000100 -``` - -How do we perform lookup though? It turns out that SSSE3 introduced a very cool -instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`, -and returns a third vector `C`. All vectors are treated as 16 8-bit integers. -`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true -for the purposes of this algorithm. For full details, see [Intel's Intrinsics -Guide][5_u].) This essentially lets us use the values in `B` to lookup values -in `A`. - -If we could somehow cause `B` to contain our 16 byte block from the haystack, -and if `A` could contain our bitmasks, then we'd end up with something like -this for `A`: - -```ignore - 0x00 0x01 ... 0x62 ... 0x66 ... 0xFF -A = 0 0 00000110 00000001 0 -``` - -And if `B` contains our window from our haystack, we could use shuffle to take -the values from `B` and use them to look up our bitsets in `A`. But of course, -we can't do this because `A` in the above example contains 256 bytes, which -is much larger than the size of a SIMD vector. - -Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of -our bitsets, we can use two masks, where one mask corresponds to the lower four -bits of our fingerprint and the other mask corresponds to the upper four bits. -So our map now looks like: - -```ignore -'f' & 0xF = 0x6 |--> 00000001 -'f' >> 4 = 0x6 |--> 00000111 -'b' & 0xF = 0x2 |--> 00000110 -'b' >> 4 = 0x6 |--> 00000111 -``` - -Notice that the bitsets for each nybble correspond to the union of all -fingerprints that contain that nybble. For example, both `f` and `b` have the -same upper 4 bits but differ on the lower 4 bits. Putting this together, we -have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is -our mask for the upper nybble and `B` is our 16 byte block from the haystack: - -```ignore - 0x00 0x01 0x02 0x03 ... 0x06 ... 0xF -A0 = 0 0 00000110 0 00000001 0 -A1 = 0 0 0 0 00000111 0 -B = b a t _ t p -B = 0x62 0x61 0x74 0x20 0x74 0x70 -``` - -But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits, -and we need indexes that are at most 4 bits (corresponding to one of 16 -values). We can apply the same transformation to split `B` into lower and upper -nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and -`B1` corresponds to the upper nybbles: - -```ignore - b a t _ c a t _ f o o _ b u m p -B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0 -B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7 -``` - -And now we have a nice correspondence. `B0` can index `A0` and `B1` can index -`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`: - -```ignore - b a ... f o ... p - A0[0x2] A0[0x1] A0[0x6] A0[0xF] A0[0x0] -C0 = 00000110 0 00000001 0 0 -``` - -And `C1 = PSHUFB(A1, B1)`: - -```ignore - b a ... f o ... p - A1[0x6] A1[0x6] A1[0x6] A1[0x6] A1[0x7] -C1 = 00000111 00000111 00000111 00000111 0 -``` - -Notice how neither one of `C0` or `C1` is guaranteed to report fully correct -results all on its own. For example, `C1` claims that `b` is a fingerprint for -the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint -for all of our patterns. But if we combined `C0` and `C1` with an `AND` -operation: - -```ignore - b a ... f o ... p -C = 00000110 0 00000001 0 0 -``` - -Then we now have that `C[i]` contains a bitset corresponding to the matching -fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that -block. - -Once we have that, we can look for the position of the least significant bit -in `C`. (Least significant because we only target `x86_64` here, which is -always little endian. Thus, the least significant bytes correspond to bytes -in our haystack at a lower address.) That position, modulo `8`, gives us -the pattern that the fingerprint matches. That position, integer divided by -`8`, also gives us the byte offset that the fingerprint occurs in inside the -16 byte haystack block. Using those two pieces of information, we can run a -verification procedure that tries to match all substrings containing that -fingerprint at that position in the haystack. - - -# Implementation notes - -The problem with the algorithm as described above is that it uses a single byte -for a fingerprint. This will work well if the fingerprints are rare in the -haystack (e.g., capital letters or special characters in normal English text), -but if the fingerprints are common, you'll wind up spending too much time in -the verification step, which effectively negates the performance benefits of -scanning 16 bytes at a time. Remember, the key to the performance of this -algorithm is to do as little work as possible per 16 (or 32) bytes. - -This algorithm can be extrapolated in a relatively straight-forward way to use -larger fingerprints. That is, instead of a single byte prefix, we might use a -two or three byte prefix. The implementation here implements N = {1, 2, 3} -and always picks the largest N possible. The rationale is that the bigger the -fingerprint, the fewer verification steps we'll do. Of course, if N is too -large, then we'll end up doing too much on each step. - -The way to extend it is: - -1. Add a mask for each byte in the fingerprint. (Remember that each mask is - composed of two SIMD vectors.) This results in a value of `C` for each byte - in the fingerprint while searching. -2. When testing each 16 (or 32) byte block, each value of `C` must be shifted - so that they are aligned. Once aligned, they should all be `AND`'d together. - This will give you only the bitsets corresponding to the full match of the - fingerprint. To do this, one needs to save the last byte (for N=2) or last - two bytes (for N=3) from the previous iteration, and then line them up with - the first one or two bytes of the next iteration. - -## Verification - -Verification generally follows the procedure outlined above. The tricky parts -are in the right formulation of operations to get our bits out of our vectors. -We have a limited set of operations available to us on SIMD vectors as 128-bit -or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers -from our vectors, and then run our verification step on each of those. The -verification step looks at the least significant bit set, and from its -position, we can derive the byte offset and bucket. (Again, as described -above.) Once we know the bucket, we do a fairly naive exhaustive search for -every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash -table, but I haven't had time to thoroughly explore that. A few initial -half-hearted attempts resulted in worse performance.) - -## AVX - -The AVX version of Teddy extrapolates almost perfectly from the SSE version. -The only hickup is that PALIGNR is used to align chunks in the 16-bit version, -and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it -only works within 128-bit lanes. So there's a bit of tomfoolery to get around -this by shuffling the vectors before calling VPALIGNR. - -The only other aspect to AVX is that since our masks are still fundamentally -16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to -32-byte chunks. - -## Fat Teddy - -In the version of Teddy described above, 8 buckets are used to group patterns -that we want to search for. However, when AVX is available, we can extend the -number of buckets to 16 by permitting each byte in our masks to use 16-bits -instead of 8-bits to represent the buckets it belongs to. (This variant is also -in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a -time, even though we're using AVX. Instead, we have to scan 16 bytes at a time. -What we gain, though, is (hopefully) less work in our verification routine. -It patterns are more spread out across more buckets, then there should overall -be fewer false positives. In general, Fat Teddy permits us to grow our capacity -a bit and search for more literals before Teddy gets overwhelmed. - -The tricky part of Fat Teddy is in how we adjust our masks and our verification -procedure. For the masks, we simply represent the first 8 buckets in each of -the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes. -Then, in the search loop, instead of loading 32 bytes from the haystack, we -load the same 16 bytes from the haystack into both the low and high 16 byte -portions of our 256-bit vector. So for example, a mask might look like this: - - bits: 00100001 00000000 ... 11000000 00000000 00000001 ... 00000000 - byte: 31 30 16 15 14 0 - offset: 15 14 0 15 14 0 - buckets: 8-15 8-15 8-15 0-7 0-7 0-7 - -Where `byte` is the position in the vector (higher numbers corresponding to -more significant bits), `offset` is the corresponding position in the haystack -chunk, and `buckets` corresponds to the bucket assignments for that particular -byte. - -In particular, notice that the bucket assignments for offset `0` are spread -out between bytes `0` and `16`. This works well for the chunk-by-chunk search -procedure, but verification really wants to process all bucket assignments for -each offset at once. Otherwise, we might wind up finding a match at offset -`1` in one the first 8 buckets, when we really should have reported a match -at offset `0` in one of the second 8 buckets. (Because we want the leftmost -match.) - -Thus, for verification, we rearrange the above vector such that it is a -sequence of 16-bit integers, where the least significant 16-bit integer -corresponds to all of the bucket assignments for offset `0`. So with the -above vector, the least significant 16-bit integer would be - - 11000000 000000 - -which was taken from bytes `16` and `0`. Then the verification step pretty much -runs as described, except with 16 buckets instead of 8. - - -# References - -- **[1]** [Hyperscan on GitHub](https://github.com/intel/hyperscan), - [webpage](https://www.hyperscan.io/) -- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R., - & Weimann, O. (2011). - _Optimal packed string matching_. - In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13). - Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik. - DOI: 10.4230/LIPIcs.FSTTCS.2011.423. - [PDF](https://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf). -- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R., - & Weimann, O. (2014). - _Towards optimal packed string matching_. - Theoretical Computer Science, 525, 111-129. - DOI: 10.1016/j.tcs.2013.06.013. - [PDF](https://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf). -- **[3]** Bille, P. (2011). - _Fast searching in packed strings_. - Journal of Discrete Algorithms, 9(1), 49-56. - DOI: 10.1016/j.jda.2010.09.003. - [PDF](https://www.sciencedirect.com/science/article/pii/S1570866710000353). -- **[4a]** Faro, S., & Külekci, M. O. (2012, October). - _Fast multiple string matching using streaming SIMD extensions technology_. - In String Processing and Information Retrieval (pp. 217-228). - Springer Berlin Heidelberg. - DOI: 10.1007/978-3-642-34109-0_23. - [PDF](https://www.dmi.unict.it/faro/papers/conference/faro32.pdf). -- **[4b]** Faro, S., & Külekci, M. O. (2013, September). - _Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_. - In Stringology (pp. 78-91). - [PDF](https://www.dmi.unict.it/faro/papers/conference/faro36.pdf). -- **[4c]** Faro, S., & Külekci, M. O. (2013, January). - _Fast packed string matching for short patterns_. - In Proceedings of the Meeting on Algorithm Engineering & Expermiments - (pp. 113-121). - Society for Industrial and Applied Mathematics. - [PDF](https://arxiv.org/pdf/1209.6449.pdf). -- **[4d]** Faro, S., & Külekci, M. O. (2014). - _Fast and flexible packed string matching_. - Journal of Discrete Algorithms, 28, 61-72. - DOI: 10.1016/j.jda.2014.07.003. - -[1_u]: https://github.com/intel/hyperscan -[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,414 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. - -use std::cmp; -use std::collections::BTreeMap; -use std::fmt; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::Teddy; - -/// A builder for constructing a Teddy matcher. -/// -/// The builder primarily permits fine grained configuration of the Teddy -/// matcher. Most options are made only available for testing/benchmarking -/// purposes. In reality, options are automatically determined by the nature -/// and number of patterns given to the builder. -#[derive(Clone, Debug)] -pub struct Builder { - /// When none, this is automatically determined. Otherwise, `false` means - /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used - /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't - /// available and Fat Teddy was requested, no matcher will be built. - fat: Option, - /// When none, this is automatically determined. Otherwise, `false` means - /// that 128-bit vectors will be used (up to SSSE3 instructions) where as - /// `true` means that 256-bit vectors will be used. As with `fat`, if - /// 256-bit vectors are requested and they aren't available, then a - /// searcher will not be built. - avx: Option, -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -impl Builder { - /// Create a new builder for configuring a Teddy matcher. - pub fn new() -> Builder { - Builder { fat: None, avx: None } - } - - /// Build a matcher for the set of patterns given. If a matcher could not - /// be built, then `None` is returned. - /// - /// Generally, a matcher isn't built if the necessary CPU features aren't - /// available, an unsupported target or if the searcher is believed to be - /// slower than standard techniques (i.e., if there are too many literals). - pub fn build(&self, patterns: &Patterns) -> Option { - self.build_imp(patterns) - } - - /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses - /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful - /// for a larger set of literals. - /// - /// `None` is the default, which results in an automatic selection based - /// on the number of literals and available CPU features. - pub fn fat(&mut self, yes: Option) -> &mut Builder { - self.fat = yes; - self - } - - /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). - /// Generally, a larger vector size is better since it either permits - /// matching more patterns or matching more bytes in the haystack at once. - /// - /// `None` is the default, which results in an automatic selection based on - /// the number of literals and available CPU features. - pub fn avx(&mut self, yes: Option) -> &mut Builder { - self.avx = yes; - self - } - - fn build_imp(&self, patterns: &Patterns) -> Option { - use crate::packed::teddy::runtime; - - // Most of the logic here is just about selecting the optimal settings, - // or perhaps even rejecting construction altogether. The choices - // we have are: fat (avx only) or not, ssse3 or avx2, and how many - // patterns we allow ourselves to search. Additionally, for testing - // and benchmarking, we permit callers to try to "force" a setting, - // and if the setting isn't allowed (e.g., forcing AVX when AVX isn't - // available), then we bail and return nothing. - - if patterns.len() > 64 { - return None; - } - let has_ssse3 = is_x86_feature_detected!("ssse3"); - let has_avx = is_x86_feature_detected!("avx2"); - let avx = if self.avx == Some(true) { - if !has_avx { - return None; - } - true - } else if self.avx == Some(false) { - if !has_ssse3 { - return None; - } - false - } else if !has_ssse3 && !has_avx { - return None; - } else { - has_avx - }; - let fat = match self.fat { - None => avx && patterns.len() > 32, - Some(false) => false, - Some(true) if !avx => return None, - Some(true) => true, - }; - - let mut compiler = Compiler::new(patterns, fat); - compiler.compile(); - let Compiler { buckets, masks, .. } = compiler; - // SAFETY: It is required that the builder only produce Teddy matchers - // that are allowed to run on the current CPU, since we later assume - // that the presence of (for example) TeddySlim1Mask256 means it is - // safe to call functions marked with the `avx2` target feature. - match (masks.len(), avx, fat) { - (1, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask128( - runtime::TeddySlim1Mask128 { - mask1: runtime::Mask128::new(masks[0]), - }, - ), - }), - (1, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask256( - runtime::TeddySlim1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (1, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat1Mask256( - runtime::TeddyFat1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (2, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask128( - runtime::TeddySlim2Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - }, - ), - }), - (2, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask256( - runtime::TeddySlim2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (2, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat2Mask256( - runtime::TeddyFat2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (3, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask128( - runtime::TeddySlim3Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - mask3: runtime::Mask128::new(masks[2]), - }, - ), - }), - (3, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask256( - runtime::TeddySlim3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - (3, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat3Mask256( - runtime::TeddyFat3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - _ => unreachable!(), - } - } -} - -/// A compiler is in charge of allocating patterns into buckets and generating -/// the masks necessary for searching. -#[derive(Clone)] -struct Compiler<'p> { - patterns: &'p Patterns, - buckets: Vec>, - masks: Vec, -} - -impl<'p> Compiler<'p> { - /// Create a new Teddy compiler for the given patterns. If `fat` is true, - /// then 16 buckets will be used instead of 8. - /// - /// This panics if any of the patterns given are empty. - fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> { - let mask_len = cmp::min(3, patterns.minimum_len()); - assert!(1 <= mask_len && mask_len <= 3); - - Compiler { - patterns, - buckets: vec![vec![]; if fat { 16 } else { 8 }], - masks: vec![Mask::default(); mask_len], - } - } - - /// Compile the patterns in this compiler into buckets and masks. - fn compile(&mut self) { - let mut lonibble_to_bucket: BTreeMap, usize> = BTreeMap::new(); - for (id, pattern) in self.patterns.iter() { - // We try to be slightly clever in how we assign patterns into - // buckets. Generally speaking, we want patterns with the same - // prefix to be in the same bucket, since it minimizes the amount - // of time we spend churning through buckets in the verification - // step. - // - // So we could assign patterns with the same N-prefix (where N - // is the size of the mask, which is one of {1, 2, 3}) to the - // same bucket. However, case insensitive searches are fairly - // common, so we'd for example, ideally want to treat `abc` and - // `ABC` as if they shared the same prefix. ASCII has the nice - // property that the lower 4 bits of A and a are the same, so we - // therefore group patterns with the same low-nybbe-N-prefix into - // the same bucket. - // - // MOREOVER, this is actually necessary for correctness! In - // particular, by grouping patterns with the same prefix into the - // same bucket, we ensure that we preserve correct leftmost-first - // and leftmost-longest match semantics. In addition to the fact - // that `patterns.iter()` iterates in the correct order, this - // guarantees that all possible ambiguous matches will occur in - // the same bucket. The verification routine could be adjusted to - // support correct leftmost match semantics regardless of bucket - // allocation, but that results in a performance hit. It's much - // nicer to be able to just stop as soon as a match is found. - let lonybs = pattern.low_nybbles(self.masks.len()); - if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) { - self.buckets[bucket].push(id); - } else { - // N.B. We assign buckets in reverse because it shouldn't have - // any influence on performance, but it does make it harder to - // get leftmost match semantics accidentally correct. - let bucket = (self.buckets.len() - 1) - - (id as usize % self.buckets.len()); - self.buckets[bucket].push(id); - lonibble_to_bucket.insert(lonybs, bucket); - } - } - for (bucket_index, bucket) in self.buckets.iter().enumerate() { - for &pat_id in bucket { - let pat = self.patterns.get(pat_id); - for (i, mask) in self.masks.iter_mut().enumerate() { - if self.buckets.len() == 8 { - mask.add_slim(bucket_index as u8, pat.bytes()[i]); - } else { - mask.add_fat(bucket_index as u8, pat.bytes()[i]); - } - } - } - } - } -} - -impl<'p> fmt::Debug for Compiler<'p> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut buckets = vec![vec![]; self.buckets.len()]; - for (i, bucket) in self.buckets.iter().enumerate() { - for &patid in bucket { - buckets[i].push(self.patterns.get(patid)); - } - } - f.debug_struct("Compiler") - .field("buckets", &buckets) - .field("masks", &self.masks) - .finish() - } -} - -/// Mask represents the low and high nybble masks that will be used during -/// search. Each mask is 32 bytes wide, although only the first 16 bytes are -/// used for the SSSE3 runtime. -/// -/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set -/// if and only if the corresponding nybble is in the ith bucket. The index of -/// the byte (0-15, inclusive) corresponds to the nybble. -/// -/// Each mask is used as the target of a shuffle, where the indices for the -/// shuffle are taken from the haystack. AND'ing the shuffles for both the -/// low and high masks together also results in 8-bit bitsets, but where bit -/// `i` is set if and only if the correspond *byte* is in the ith bucket. -/// -/// During compilation, masks are just arrays. But during search, these masks -/// are represented as 128-bit or 256-bit vectors. -/// -/// (See the README is this directory for more details.) -#[derive(Clone, Copy, Default)] -pub struct Mask { - lo: [u8; 32], - hi: [u8; 32], -} - -impl Mask { - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-7. - /// - /// This is for "slim" Teddy, where there are only 8 buckets. - fn add_slim(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 8); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // When using 256-bit vectors, we need to set this bucket assignment in - // the low and high 128-bit portions of the mask. This allows us to - // process 32 bytes at a time. Namely, AVX2 shuffles operate on each - // of the 128-bit lanes, rather than the full 256-bit vector at once. - self.lo[byte_lo] |= 1 << bucket; - self.lo[byte_lo + 16] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - self.hi[byte_hi + 16] |= 1 << bucket; - } - - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-15. - /// - /// This is for "fat" Teddy, where there are 16 buckets. - fn add_fat(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 16); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, - // the high 128 bits of our mask correspond to buckets 8-15, while the - // low 128 bits correspond to buckets 0-7. - if bucket < 8 { - self.lo[byte_lo] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - } else { - self.lo[byte_lo + 16] |= 1 << (bucket % 8); - self.hi[byte_hi + 16] |= 1 << (bucket % 8); - } - } - - /// Return the low 128 bits of the low-nybble mask. - pub fn lo128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.lo[..16]); - tmp - } - - /// Return the full low-nybble mask. - pub fn lo256(&self) -> [u8; 32] { - self.lo - } - - /// Return the low 128 bits of the high-nybble mask. - pub fn hi128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.hi[..16]); - tmp - } - - /// Return the full high-nybble mask. - pub fn hi256(&self) -> [u8; 32] { - self.hi - } -} - -impl fmt::Debug for Mask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let (mut parts_lo, mut parts_hi) = (vec![], vec![]); - for i in 0..32 { - parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); - parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); - } - f.debug_struct("Mask") - .field("lo", &parts_lo) - .field("hi", &parts_hi) - .finish() - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::compile::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Teddy; -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::runtime::Teddy; - -#[cfg(target_arch = "x86_64")] -mod compile; -#[cfg(target_arch = "x86_64")] -mod runtime; - -#[cfg(not(target_arch = "x86_64"))] -mod fallback { - use crate::packed::pattern::Patterns; - use crate::Match; - - #[derive(Clone, Debug, Default)] - pub struct Builder(()); - - impl Builder { - pub fn new() -> Builder { - Builder(()) - } - - pub fn build(&self, _: &Patterns) -> Option { - None - } - - pub fn fat(&mut self, _: Option) -> &mut Builder { - self - } - - pub fn avx(&mut self, _: Option) -> &mut Builder { - self - } - } - - #[derive(Clone, Debug)] - pub struct Teddy(()); - - impl Teddy { - pub fn find_at( - &self, - _: &Patterns, - _: &[u8], - _: usize, - ) -> Option { - None - } - - pub fn minimum_len(&self) -> usize { - 0 - } - - pub fn heap_bytes(&self) -> usize { - 0 - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1204 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. -// It is strongly recommended to peruse the README before trying to grok this -// code, as its use of SIMD is pretty opaque, although I tried to add comments -// where appropriate. -// -// Moreover, while there is a lot of code in this file, most of it is -// repeated variants of the same thing. Specifically, there are three Teddy -// variants: Slim 128-bit Teddy (8 buckets), Slim 256-bit Teddy (8 buckets) -// and Fat 256-bit Teddy (16 buckets). For each variant, there are three -// implementations, corresponding to mask lengths of 1, 2 and 3. Bringing it to -// a total of nine variants. Each one is structured roughly the same: -// -// while at <= len(haystack) - CHUNK_SIZE: -// let candidate = find_candidate_in_chunk(haystack, at) -// if not all zeroes(candidate): -// if match = verify(haystack, at, candidate): -// return match -// -// For the most part, this remains unchanged. The parts that vary are the -// verification routine (for slim vs fat Teddy) and the candidate extraction -// (based on the number of masks). -// -// In the code below, a "candidate" corresponds to a single vector with 8-bit -// lanes. Each lane is itself an 8-bit bitset, where the ith bit is set in the -// jth lane if and only if the byte occurring at position `j` is in the -// bucket `i` (where the `j`th position is the position in the current window -// of the haystack, which is always 16 or 32 bytes). Note to be careful here: -// the ith bit and the jth lane correspond to the least significant bits of the -// vector. So when visualizing how the current window of bytes is stored in a -// vector, you often need to flip it around. For example, the text `abcd` in a -// 4-byte vector would look like this: -// -// 01100100 01100011 01100010 01100001 -// d c b a -// -// When the mask length is 1, then finding the candidate is pretty straight -// forward: you just apply the shuffle indices (from the haystack window) to -// the masks, and then AND them together, as described in the README. But for -// masks of length 2 and 3, you need to keep a little state. Specifically, -// you need to store the final 1 (for mask length 2) or 2 (for mask length 3) -// bytes of the candidate for use when searching the next window. This is for -// handling matches that span two windows. -// -// With respect to the repeated code, it would likely be possible to reduce -// the number of copies of code below using polymorphism, but I find this -// formulation clearer instead of needing to reason through generics. However, -// I admit, there may be a simpler generic construction that I'm missing. -// -// All variants are fairly heavily tested in src/packed/tests.rs. - -use std::arch::x86_64::*; -use std::mem; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::compile; -use crate::packed::vector::*; -use crate::Match; - -/// The Teddy runtime. -/// -/// A Teddy runtime can be used to quickly search for occurrences of one or -/// more patterns. While it does not scale to an arbitrary number of patterns -/// like Aho-Corasick, it does find occurrences for a small set of patterns -/// much more quickly than Aho-Corasick. -/// -/// Teddy cannot run on small haystacks below a certain size, which is -/// dependent on the type of matcher used. This size can be queried via the -/// `minimum_len` method. Violating this will result in a panic. -/// -/// Finally, when callers use a Teddy runtime, they must provide precisely the -/// patterns used to construct the Teddy matcher. Violating this will result -/// in either a panic or incorrect results, but will never sacrifice memory -/// safety. -#[derive(Clone, Debug)] -pub struct Teddy { - /// The allocation of patterns in buckets. This only contains the IDs of - /// patterns. In order to do full verification, callers must provide the - /// actual patterns when using Teddy. - pub buckets: Vec>, - /// The maximum identifier of a pattern. This is used as a sanity check to - /// ensure that the patterns provided by the caller are the same as the - /// patterns that were used to compile the matcher. This sanity check - /// permits safely eliminating bounds checks regardless of what patterns - /// are provided by the caller. - /// - /// Note that users of the aho-corasick crate cannot get this wrong. Only - /// code internal to this crate can get it wrong, since neither `Patterns` - /// type nor the Teddy runtime are public API items. - pub max_pattern_id: PatternID, - /// The actual runtime to use. - pub exec: Exec, -} - -impl Teddy { - /// Return the first occurrence of a match in the given haystack after or - /// starting at `at`. - /// - /// The patterns provided must be precisely the same patterns given to the - /// Teddy builder, otherwise this may panic or produce incorrect results. - /// - /// All matches are consistent with the match semantics (leftmost-first or - /// leftmost-longest) set on `pats`. - pub fn find_at( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - ) -> Option { - // This assert is a bit subtle, but it's an important guarantee. - // Namely, if the maximum pattern ID seen by Teddy is the same as the - // one in the patterns given, then we are guaranteed that every pattern - // ID in all Teddy buckets are valid indices into `pats`. While this - // is nominally true, there is no guarantee that callers provide the - // same `pats` to both the Teddy builder and the searcher, which would - // otherwise make `find_at` unsafe to call. But this assert lets us - // keep this routine safe and eliminate an important bounds check in - // verification. - assert_eq!( - self.max_pattern_id, - pats.max_pattern_id(), - "teddy must be called with same patterns it was built with", - ); - // SAFETY: The haystack must have at least a minimum number of bytes - // for Teddy to be able to work. The minimum number varies depending on - // which matcher is used below. If this is violated, then it's possible - // for searching to do out-of-bounds writes. - assert!(haystack[at..].len() >= self.minimum_len()); - // SAFETY: The various Teddy matchers are always safe to call because - // the Teddy builder guarantees that a particular Exec variant is - // built only when it can be run the current CPU. That is, the Teddy - // builder will not produce a Exec::TeddySlim1Mask256 unless AVX2 is - // enabled. That is, our dynamic CPU feature detection is performed - // once in the builder, and we rely on the type system to avoid needing - // to do it again. - unsafe { - match self.exec { - Exec::TeddySlim1Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - } - } - } - - /// Returns the minimum length of a haystack that must be provided by - /// callers to this Teddy searcher. Providing a haystack shorter than this - /// will result in a panic, but will never violate memory safety. - pub fn minimum_len(&self) -> usize { - // SAFETY: These values must be correct in order to ensure safety. - // The Teddy runtime assumes their haystacks have at least these - // lengths. Violating this will sacrifice memory safety. - match self.exec { - Exec::TeddySlim1Mask128(_) => 16, - Exec::TeddySlim1Mask256(_) => 32, - Exec::TeddyFat1Mask256(_) => 16, - Exec::TeddySlim2Mask128(_) => 17, - Exec::TeddySlim2Mask256(_) => 33, - Exec::TeddyFat2Mask256(_) => 17, - Exec::TeddySlim3Mask128(_) => 18, - Exec::TeddySlim3Mask256(_) => 34, - Exec::TeddyFat3Mask256(_) => 34, - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::() - } - - /// Runs the verification routine for Slim 128-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the SSSE3 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify128( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m128i, - ) -> Option { - debug_assert!(!is_all_zeroes128(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x128(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Slim 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x256(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Fat 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + (j < 16 ? j : j - 16)` in `haystack` is in the - /// bucket `j < 16 ? i : i + 8`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify_fat256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(16, self.buckets.len()); - - // This is a bit tricky, but we basically want to convert our - // candidate, which looks like this - // - // a31 a30 ... a17 a16 a15 a14 ... a01 a00 - // - // where each a(i) is an 8-bit bitset corresponding to the activated - // buckets, to this - // - // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 - // - // Namely, for Fat Teddy, the high 128-bits of the candidate correspond - // to the same bytes in the haystack in the low 128-bits (so we only - // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. - // - // The verification routine wants to look at all potentially matching - // buckets before moving on to the next lane. So for example, both - // a16 and a00 both correspond to the first byte in our window; a00 - // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, - // a16 should be checked before a01. So the transformation shown above - // allows us to use our normal verification procedure with one small - // change: we treat each bitset as 16 bits instead of 8 bits. - - // Swap the 128-bit lanes in the candidate vector. - let swap = _mm256_permute4x64_epi64(cand, 0x4E); - // Interleave the bytes from the low 128-bit lanes, starting with - // cand first. - let r1 = _mm256_unpacklo_epi8(cand, swap); - // Interleave the bytes from the high 128-bit lanes, starting with - // cand first. - let r2 = _mm256_unpackhi_epi8(cand, swap); - // Now just take the 2 low 64-bit integers from both r1 and r2. We - // can drop the high 64-bit integers because they are a mirror image - // of the low 64-bit integers. All we care about are the low 128-bit - // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets - // laid out in the desired order, as described above. - let parts = unpacklo64x256(r1, r2); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 4; - if let Some(m) = self.verify64(pats, 16, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at or after `at` in the - /// given `haystack`. The candidate given should correspond to either 8-bit - /// (for 8 buckets) or 16-bit (16 buckets) bitsets. - #[inline(always)] - fn verify64( - &self, - pats: &Patterns, - bucket_count: usize, - haystack: &[u8], - at: usize, - mut cand: u64, - ) -> Option { - // N.B. While the bucket count is known from self.buckets.len(), - // requiring it as a parameter makes it easier for the optimizer to - // know its value, and thus produce more efficient codegen. - debug_assert!(bucket_count == 8 || bucket_count == 16); - while cand != 0 { - let bit = cand.trailing_zeros() as usize; - cand &= !(1 << bit); - - let at = at + (bit / bucket_count); - let bucket = bit % bucket_count; - if let Some(m) = self.verify_bucket(pats, haystack, bucket, at) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at `at` in the given - /// `haystack` corresponding only to patterns in the given bucket. - #[inline(always)] - fn verify_bucket( - &self, - pats: &Patterns, - haystack: &[u8], - bucket: usize, - at: usize, - ) -> Option { - // Forcing this function to not inline and be "cold" seems to help - // the codegen for Teddy overall. Interestingly, this is good for a - // 16% boost in the sherlock/packed/teddy/name/alt1 benchmark (among - // others). Overall, this seems like a problem with codegen, since - // creating the Match itself is a very small amount of code. - #[cold] - #[inline(never)] - fn match_from_span( - pati: PatternID, - start: usize, - end: usize, - ) -> Match { - Match::from_span(pati as usize, start, end) - } - - // N.B. The bounds check for this bucket lookup *should* be elided - // since we assert the number of buckets in each `find_at` routine, - // and the compiler can prove that the `% 8` (or `% 16`) in callers - // of this routine will always be in bounds. - for &pati in &self.buckets[bucket] { - // SAFETY: This is safe because we are guaranteed that every - // index in a Teddy bucket is a valid index into `pats`. This - // guarantee is upheld by the assert checking `max_pattern_id` in - // the beginning of `find_at` above. - // - // This explicit bounds check elision is (amazingly) good for a - // 25-50% boost in some benchmarks, particularly ones with a lot - // of short literals. - let pat = unsafe { pats.get_unchecked(pati) }; - if pat.is_prefix(&haystack[at..]) { - return Some(match_from_span(pati, at, at + pat.len())); - } - } - None - } -} - -/// Exec represents the different search strategies supported by the Teddy -/// runtime. -/// -/// This enum is an important safety abstraction. Namely, callers should only -/// construct a variant in this enum if it is safe to execute its corresponding -/// target features on the current CPU. The 128-bit searchers require SSSE3, -/// while the 256-bit searchers require AVX2. -#[derive(Clone, Debug)] -pub enum Exec { - TeddySlim1Mask128(TeddySlim1Mask128), - TeddySlim1Mask256(TeddySlim1Mask256), - TeddyFat1Mask256(TeddyFat1Mask256), - TeddySlim2Mask128(TeddySlim2Mask128), - TeddySlim2Mask256(TeddySlim2Mask256), - TeddyFat2Mask256(TeddyFat2Mask256), - TeddySlim3Mask128(TeddySlim3Mask128), - TeddySlim3Mask256(TeddySlim3Mask256), - TeddyFat3Mask256(TeddyFat3Mask256), -} - -// Most of the code below remains undocumented because they are effectively -// repeated versions of themselves. The general structure is described in the -// README and in the comments above. - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask128 { - pub mask1: Mask128, -} - -impl TeddySlim1Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - members1m128(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask256 { - pub mask1: Mask256, -} - -impl TeddySlim1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 32 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat1Mask256 { - pub mask1: Mask256, -} - -impl TeddyFat1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, -} - -impl TeddySlim2Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones128(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1) = members2m128(chunk, self.mask1, self.mask2); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 15); - _mm_and_si128(res0prev0, res1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddySlim2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = alignr256_15(res0, *prev0); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddyFat2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 15); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, - pub mask3: Mask128, -} - -impl TeddySlim3Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones128(), ones128()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - prev1 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - prev1: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1, res2) = - members3m128(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm_alignr_epi8(res1, *prev1, 15); - let res = _mm_and_si128(_mm_and_si128(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddySlim3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = alignr256_14(res0, *prev0); - let res1prev1 = alignr256_15(res1, *prev1); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddyFat3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm256_alignr_epi8(res1, *prev1, 15); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -/// A 128-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -#[derive(Clone, Copy, Debug)] -pub struct Mask128 { - lo: __m128i, - hi: __m128i, -} - -impl Mask128 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask128 { - // SAFETY: This is safe since [u8; 16] has the same representation - // as __m128i. - unsafe { - Mask128 { - lo: mem::transmute(mask.lo128()), - hi: mem::transmute(mask.hi128()), - } - } - } -} - -/// A 256-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -/// -/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being -/// used. For Slim Teddy, the bitsets in the lower 128-bits are the same as -/// the bitsets in the higher 128-bits, so that we can search 32 bytes at a -/// time. (Remember, the nybbles in the haystack are used as indices into these -/// masks, and 256-bit shuffles only operate on 128-bit lanes.) -/// -/// For Fat Teddy, the bitsets are not repeated, but instead, the high 128 -/// bits correspond to buckets 8-15. So that a bitset `00100010` has buckets -/// 1 and 5 set if it's in the lower 128 bits, but has buckets 9 and 13 set -/// if it's in the higher 128 bits. -#[derive(Clone, Copy, Debug)] -pub struct Mask256 { - lo: __m256i, - hi: __m256i, -} - -impl Mask256 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask256 { - // SAFETY: This is safe since [u8; 32] has the same representation - // as __m256i. - unsafe { - Mask256 { - lo: mem::transmute(mask.lo256()), - hi: mem::transmute(mask.hi256()), - } - } - } -} - -// The "members" routines below are responsible for taking a chunk of bytes, -// a number of nybble masks and returning the result of using the masks to -// lookup bytes in the chunk. The results of the high and low nybble masks are -// AND'ed together, such that each candidate returned is a vector, with byte -// sized lanes, and where each lane is an 8-bit bitset corresponding to the -// buckets that contain the corresponding byte. -// -// In the case of masks of length greater than 1, callers will need to keep -// the results from the previous haystack's window, and then shift the vectors -// so that they all line up. Then they can be AND'ed together. - -/// Return a candidate for Slim 128-bit Teddy, where `chunk` corresponds to a -/// 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -#[target_feature(enable = "ssse3")] -unsafe fn members1m128(chunk: __m128i, mask1: Mask128) -> __m128i { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return a candidate for Slim 256-bit Teddy, where `chunk` corresponds to a -/// 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members1m256(chunk: __m256i, mask1: Mask256) -> __m256i { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members2m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, -) -> (__m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members2m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, -) -> (__m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members3m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, - mask3: Mask128, -) -> (__m128i, __m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm_and_si128( - _mm_shuffle_epi8(mask3.lo, hlo), - _mm_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members3m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, - mask3: Mask256, -) -> (__m256i, __m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm256_and_si256( - _mm256_shuffle_epi8(mask3.lo, hlo), - _mm256_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,568 +0,0 @@ -use std::collections::HashMap; -use std::usize; - -use crate::packed::{Config, MatchKind}; -use crate::Match; - -/// A description of a single test against a multi-pattern searcher. -/// -/// A single test may not necessarily pass on every configuration of a -/// searcher. The tests are categorized and grouped appropriately below. -#[derive(Clone, Debug, Eq, PartialEq)] -struct SearchTest { - /// The name of this test, for debugging. - name: &'static str, - /// The patterns to search for. - patterns: &'static [&'static str], - /// The text to search. - haystack: &'static str, - /// Each match is a triple of (pattern_index, start, end), where - /// pattern_index is an index into `patterns` and `start`/`end` are indices - /// into `haystack`. - matches: &'static [(usize, usize, usize)], -} - -struct SearchTestOwned { - offset: usize, - name: String, - patterns: Vec, - haystack: String, - matches: Vec<(usize, usize, usize)>, -} - -impl SearchTest { - fn variations(&self) -> Vec { - let mut tests = vec![]; - for i in 0..=260 { - tests.push(self.offset_prefix(i)); - tests.push(self.offset_suffix(i)); - tests.push(self.offset_both(i)); - } - tests - } - - fn offset_both(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!( - "{}{}{}", - "Z".repeat(off), - self.haystack, - "Z".repeat(off) - ), - matches: self - .matches - .iter() - .map(|&(id, s, e)| (id, s + off, e + off)) - .collect(), - } - } - - fn offset_prefix(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!("{}{}", "Z".repeat(off), self.haystack), - matches: self - .matches - .iter() - .map(|&(id, s, e)| (id, s + off, e + off)) - .collect(), - } - } - - fn offset_suffix(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!("{}{}", self.haystack, "Z".repeat(off)), - matches: self.matches.to_vec(), - } - } - - // fn to_owned(&self) -> SearchTestOwned { - // SearchTestOwned { - // name: self.name.to_string(), - // patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - // haystack: self.haystack.to_string(), - // matches: self.matches.iter().cloned().collect(), - // } - // } -} - -/// Short-hand constructor for SearchTest. We use it a lot below. -macro_rules! t { - ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { - SearchTest { - name: stringify!($name), - patterns: $patterns, - haystack: $haystack, - matches: $matches, - } - }; -} - -/// A collection of test groups. -type TestCollection = &'static [&'static [SearchTest]]; - -// Define several collections corresponding to the different type of match -// semantics supported. These collections have some overlap, but each -// collection should have some tests that no other collection has. - -/// Tests for leftmost-first match semantics. -const PACKED_LEFTMOST_FIRST: TestCollection = - &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY]; - -/// Tests for leftmost-longest match semantics. -const PACKED_LEFTMOST_LONGEST: TestCollection = - &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY]; - -// Now define the individual tests that make up the collections above. - -/// A collection of tests for the that should always be true regardless of -/// match semantics. That is, all combinations of leftmost-{first, longest} -/// should produce the same answer. -const BASICS: &'static [SearchTest] = &[ - t!(basic001, &["a"], "", &[]), - t!(basic010, &["a"], "a", &[(0, 0, 1)]), - t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), - t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]), - t!(basic050, &["a"], "bba", &[(0, 2, 3)]), - t!(basic060, &["a"], "bbb", &[]), - t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), - t!(basic100, &["aa"], "", &[]), - t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), - t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), - t!(basic130, &["aa"], "abbab", &[]), - t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), - t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]), - t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), - t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), - t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), - t!(basic300, &["a", "b"], "", &[]), - t!(basic310, &["a", "b"], "z", &[]), - t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), - t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), - t!( - basic340, - &["a", "b"], - "abba", - &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] - ), - t!( - basic350, - &["b", "a"], - "abba", - &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] - ), - t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), - t!(basic400, &["foo", "bar"], "", &[]), - t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), - t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), - t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), - t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), - t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), - t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), - t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), - t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), - t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), - t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), - t!( - basic720, - &["yabcdef", "bcdeyabc", "abcdezghi"], - "yabcdezghi", - &[(2, 1, 10),] - ), - t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), - t!( - basic840, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] - ), - t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), -]; - -/// Tests for leftmost match semantics. These should pass for both -/// leftmost-first and leftmost-longest match kinds. Stated differently, among -/// ambiguous matches, the longest match and the match that appeared first when -/// constructing the automaton should always be the same. -const LEFTMOST: &'static [SearchTest] = &[ - t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), - t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), - t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), - t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), - t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - leftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8), (0, 8, 9),] - ), -]; - -/// Tests for non-overlapping leftmost-first match semantics. These tests -/// should generally be specific to leftmost-first, which means they should -/// generally fail under leftmost-longest semantics. -const LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), - t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), - t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), - t!( - leftfirst310, - &["abcd", "b", "bce", "ce"], - "abce", - &[(1, 1, 2), (3, 2, 4),] - ), - t!( - leftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1), (2, 7, 9),] - ), - t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), - t!( - leftfirst340, - &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"], - "abcdef", - &[(0, 0, 6)] - ), -]; - -/// Tests for non-overlapping leftmost-longest match semantics. These tests -/// should generally be specific to leftmost-longest, which means they should -/// generally fail under leftmost-first semantics. -const LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), - t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), - t!( - leftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), - t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), -]; - -/// Regression tests that are applied to all combinations. -/// -/// If regression tests are needed for specific match semantics, then add them -/// to the appropriate group above. -const REGRESSION: &'static [SearchTest] = &[ - t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), - t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), - t!( - regression030, - &["libcore/", "libstd/"], - "libcore/char/methods.rs", - &[(0, 0, 8),] - ), - t!( - regression040, - &["libstd/", "libcore/"], - "libcore/char/methods.rs", - &[(1, 0, 8),] - ), - t!( - regression050, - &["\x00\x00\x01", "\x00\x00\x00"], - "\x00\x00\x00", - &[(1, 0, 3),] - ), - t!( - regression060, - &["\x00\x00\x00", "\x00\x00\x01"], - "\x00\x00\x00", - &[(0, 0, 3),] - ), -]; - -const TEDDY: &'static [SearchTest] = &[ - t!( - teddy010, - &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"], - "abcdefghijk", - &[ - (0, 0, 1), - (1, 1, 2), - (2, 2, 3), - (3, 3, 4), - (4, 4, 5), - (5, 5, 6), - (6, 6, 7), - (7, 7, 8), - (8, 8, 9), - (9, 9, 10), - (10, 10, 11) - ] - ), - t!( - teddy020, - &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"], - "abcdefghijk", - &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),] - ), - t!( - teddy030, - &["abc"], - "abcdefghijklmnopqrstuvwxyzabcdefghijk", - &[(0, 0, 3), (0, 26, 29)] - ), -]; - -// Now define a test for each combination of things above that we want to run. -// Since there are a few different combinations for each collection of tests, -// we define a couple of macros to avoid repetition drudgery. The testconfig -// macro constructs the automaton from a given match kind, and runs the search -// tests one-by-one over the given collection. The `with` parameter allows one -// to configure the config with additional parameters. The testcombo macro -// invokes testconfig in precisely this way: it sets up several tests where -// each one turns a different knob on Config. - -macro_rules! testconfig { - ($name:ident, $collection:expr, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut config = Config::new(); - $with(&mut config); - config - .builder() - .extend(test.patterns.iter().map(|p| p.as_bytes())) - .build() - .unwrap() - .find_iter(&test.haystack) - .collect() - }); - } - }; -} - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_default_leftmost_first, - PACKED_LEFTMOST_FIRST, - |_: &mut Config| {} -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_default_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.match_kind(MatchKind::LeftmostLongest); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_ssse3_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_ssse3_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_avx2_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_avx2_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_fat_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_fat_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); - } - } -); - -testconfig!( - search_rabinkarp_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_rabin_karp(true); - } -); - -testconfig!( - search_rabinkarp_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); - } -); - -#[test] -fn search_tests_have_unique_names() { - let assert = |constname, tests: &[SearchTest]| { - let mut seen = HashMap::new(); // map from test name to position - for (i, test) in tests.iter().enumerate() { - if !seen.contains_key(test.name) { - seen.insert(test.name, i); - } else { - let last = seen[test.name]; - panic!( - "{} tests have duplicate names at positions {} and {}", - constname, last, i - ); - } - } - }; - assert("BASICS", BASICS); - assert("LEFTMOST", LEFTMOST); - assert("LEFTMOST_FIRST", LEFTMOST_FIRST); - assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); - assert("REGRESSION", REGRESSION); - assert("TEDDY", TEDDY); -} - -fn run_search_tests Vec>( - which: TestCollection, - mut f: F, -) { - let get_match_triples = - |matches: Vec| -> Vec<(usize, usize, usize)> { - matches - .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) - .collect() - }; - for &tests in which { - for spec in tests { - for test in spec.variations() { - assert_eq!( - test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}", - test.name, - test.patterns, - test.haystack, - test.offset, - ); - } - } - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,181 +0,0 @@ -// This file contains a set of fairly generic utility functions when working -// with SIMD vectors. -// -// SAFETY: All of the routines below are unsafe to call because they assume -// the necessary CPU target features in order to use particular vendor -// intrinsics. Calling these routines when the underlying CPU does not support -// the appropriate target features is NOT safe. Callers must ensure this -// themselves. -// -// Note that it may not look like this safety invariant is being upheld when -// these routines are called. Namely, the CPU feature check is typically pretty -// far away from when these routines are used. Instead, we rely on the fact -// that certain types serve as a guaranteed receipt that pertinent target -// features are enabled. For example, the only way TeddySlim3Mask256 can be -// constructed is if the AVX2 CPU feature is available. Thus, any code running -// inside of TeddySlim3Mask256 can use any of the functions below without any -// additional checks: its very existence *is* the check. - -use std::arch::x86_64::*; - -/// Shift `a` to the left by two bytes (removing its two most significant -/// bytes), and concatenate it with the the two most significant bytes of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i { - // Credit goes to jneem for figuring this out: - // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 - // - // TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR - // instructions, which is not what we want, so we need to do some extra - // shuffling. - - // This permute gives us the low 16 bytes of a concatenated with the high - // 16 bytes of b, in order of most significant to least significant. So - // `v = a[15:0] b[31:16]`. - let v = _mm256_permute2x128_si256(b, a, 0x21); - // This effectively does this (where we deal in terms of byte-indexing - // and byte-shifting, and use inclusive ranges): - // - // ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14 - // = ((a[15:0] << 16) | b[31:16]) >> 14 - // ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14 - // = ((a[31:16] << 16) | a[15:0]) >> 14 - // - // Which therefore results in: - // - // ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30] - // - // The end result is that we've effectively done this: - // - // (a << 2) | (b >> 30) - // - // When `A` and `B` are strings---where the beginning of the string is in - // the least significant bits---we effectively result in the following - // semantic operation: - // - // (A >> 2) | (B << 30) - // - // The reversal being attributed to the fact that we are in little-endian. - _mm256_alignr_epi8(a, v, 14) -} - -/// Shift `a` to the left by one byte (removing its most significant byte), and -/// concatenate it with the the most significant byte of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i { - // For explanation, see alignr256_14. - let v = _mm256_permute2x128_si256(b, a, 0x21); - _mm256_alignr_epi8(a, v, 15) -} - -/// Unpack the given 128-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "ssse3")] -pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] { - [ - _mm_cvtsi128_si64(a) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64, - ] -} - -/// Unpack the given 256-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] { - // Using transmute here is precisely equivalent, but actually slower. It's - // not quite clear why. - let lo = _mm256_extracti128_si256(a, 0); - let hi = _mm256_extracti128_si256(a, 1); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} - -/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit -/// integers. -/// -/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element -/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits, -/// then the return value is `b2 b1 a2 a1`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] { - let lo = _mm256_castsi256_si128(a); - let hi = _mm256_castsi256_si128(b); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} - -/// Returns true if and only if all bits in the given 128-bit vector are 0. -#[target_feature(enable = "ssse3")] -pub unsafe fn is_all_zeroes128(a: __m128i) -> bool { - let cmp = _mm_cmpeq_epi8(a, zeroes128()); - _mm_movemask_epi8(cmp) as u32 == 0xFFFF -} - -/// Returns true if and only if all bits in the given 256-bit vector are 0. -#[target_feature(enable = "avx2")] -pub unsafe fn is_all_zeroes256(a: __m256i) -> bool { - let cmp = _mm256_cmpeq_epi8(a, zeroes256()); - _mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF -} - -/// Load a 128-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "sse2")] -pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm_loadu_si128(ptr as *const u8 as *const __m128i) -} - -/// Load a 256-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "avx2")] -pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm256_loadu_si256(ptr as *const u8 as *const __m256i) -} - -/// Returns a 128-bit vector with all bits set to 0. -#[target_feature(enable = "sse2")] -pub unsafe fn zeroes128() -> __m128i { - _mm_set1_epi8(0) -} - -/// Returns a 256-bit vector with all bits set to 0. -#[target_feature(enable = "avx2")] -pub unsafe fn zeroes256() -> __m256i { - _mm256_set1_epi8(0) -} - -/// Returns a 128-bit vector with all bits set to 1. -#[target_feature(enable = "sse2")] -pub unsafe fn ones128() -> __m128i { - _mm_set1_epi8(0xFF as u8 as i8) -} - -/// Returns a 256-bit vector with all bits set to 1. -#[target_feature(enable = "avx2")] -pub unsafe fn ones256() -> __m256i { - _mm256_set1_epi8(0xFF as u8 as i8) -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1057 +0,0 @@ -use std::cmp; -use std::fmt; -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::u8; - -use memchr::{memchr, memchr2, memchr3}; - -use crate::ahocorasick::MatchKind; -use crate::packed; -use crate::Match; - -/// A candidate is the result of running a prefilter on a haystack at a -/// particular position. The result is either no match, a confirmed match or -/// a possible match. -/// -/// When no match is returned, the prefilter is guaranteeing that no possible -/// match can be found in the haystack, and the caller may trust this. That is, -/// all correct prefilters must never report false negatives. -/// -/// In some cases, a prefilter can confirm a match very quickly, in which case, -/// the caller may use this to stop what it's doing and report the match. In -/// this case, prefilter implementations must never report a false positive. -/// In other cases, the prefilter can only report a potential match, in which -/// case the callers must attempt to confirm the match. In this case, prefilter -/// implementations are permitted to return false positives. -#[derive(Clone, Debug)] -pub enum Candidate { - None, - Match(Match), - PossibleStartOfMatch(usize), -} - -impl Candidate { - /// Convert this candidate into an option. This is useful when callers - /// do not distinguish between true positives and false positives (i.e., - /// the caller must always confirm the match in order to update some other - /// state). - pub fn into_option(self) -> Option { - match self { - Candidate::None => None, - Candidate::Match(ref m) => Some(m.start()), - Candidate::PossibleStartOfMatch(start) => Some(start), - } - } -} - -/// A prefilter describes the behavior of fast literal scanners for quickly -/// skipping past bytes in the haystack that we know cannot possibly -/// participate in a match. -pub trait Prefilter: - Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug -{ - /// Returns the next possible match candidate. This may yield false - /// positives, so callers must confirm a match starting at the position - /// returned. This, however, must never produce false negatives. That is, - /// this must, at minimum, return the starting position of the next match - /// in the given haystack after or at the given position. - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate; - - /// A method for cloning a prefilter, to work-around the fact that Clone - /// is not object-safe. - fn clone_prefilter(&self) -> Box; - - /// Returns the approximate total amount of heap used by this prefilter, in - /// units of bytes. - fn heap_bytes(&self) -> usize; - - /// Returns true if and only if this prefilter never returns false - /// positives. This is useful for completely avoiding the automaton - /// when the prefilter can quickly confirm its own matches. - /// - /// By default, this returns true, which is conservative; it is always - /// correct to return `true`. Returning `false` here and reporting a false - /// positive will result in incorrect searches. - fn reports_false_positives(&self) -> bool { - true - } - - /// Returns true if and only if this prefilter may look for a non-starting - /// position of a match. - /// - /// This is useful in a streaming context where prefilters that don't look - /// for a starting position of a match can be quite difficult to deal with. - /// - /// This returns false by default. - fn looks_for_non_start_of_match(&self) -> bool { - false - } -} - -impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P { - #[inline] - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - (**self).next_candidate(state, haystack, at) - } - - fn clone_prefilter(&self) -> Box { - (**self).clone_prefilter() - } - - fn heap_bytes(&self) -> usize { - (**self).heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - (**self).reports_false_positives() - } -} - -/// A convenience object for representing any type that implements Prefilter -/// and is cloneable. -#[derive(Debug)] -pub struct PrefilterObj(Box); - -impl Clone for PrefilterObj { - fn clone(&self) -> Self { - PrefilterObj(self.0.clone_prefilter()) - } -} - -impl PrefilterObj { - /// Create a new prefilter object. - pub fn new(t: T) -> PrefilterObj { - PrefilterObj(Box::new(t)) - } - - /// Return the underlying prefilter trait object. - pub fn as_ref(&self) -> &dyn Prefilter { - &*self.0 - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator via, e.g., `find_iter`, is treated as a single search.) -#[derive(Clone, Debug)] -pub struct PrefilterState { - /// The number of skips that has been executed. - skips: usize, - /// The total number of bytes that have been skipped. - skipped: usize, - /// The maximum length of a match. This is used to help determine how many - /// bytes on average should be skipped in order for a prefilter to be - /// effective. - max_match_len: usize, - /// Once this heuristic has been deemed permanently ineffective, it will be - /// inert throughout the rest of its lifetime. This serves as a cheap way - /// to check inertness. - inert: bool, - /// The last (absolute) position at which a prefilter scanned to. - /// Prefilters can use this position to determine whether to re-scan or - /// not. - /// - /// Unlike other things that impact effectiveness, this is a fleeting - /// condition. That is, a prefilter can be considered ineffective if it is - /// at a position before `last_scan_at`, but can become effective again - /// once the search moves past `last_scan_at`. - /// - /// The utility of this is to both avoid additional overhead from calling - /// the prefilter and to avoid quadratic behavior. This ensures that a - /// prefilter will scan any particular byte at most once. (Note that some - /// prefilters, like the start-byte prefilter, do not need to use this - /// field at all, since it only looks for starting bytes.) - last_scan_at: usize, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: usize = 40; - - /// The minimum amount of bytes that skipping must average, expressed as a - /// factor of the multiple of the length of a possible match. - /// - /// That is, after MIN_SKIPS have occurred, if the average number of bytes - /// skipped ever falls below MIN_AVG_FACTOR * max-match-length, then the - /// prefilter outed to be rendered inert. - const MIN_AVG_FACTOR: usize = 2; - - /// Create a fresh prefilter state. - pub fn new(max_match_len: usize) -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len, - inert: false, - last_scan_at: 0, - } - } - - /// Create a prefilter state that always disables the prefilter. - pub fn disabled() -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len: 0, - inert: true, - last_scan_at: 0, - } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - fn update_skipped_bytes(&mut self, skipped: usize) { - self.skips += 1; - self.skipped += skipped; - } - - /// Updates the position at which the last scan stopped. This may be - /// greater than the position of the last candidate reported. For example, - /// searching for the "rare" byte `z` in `abczdef` for the pattern `abcz` - /// will report a candidate at position `0`, but the end of its last scan - /// will be at position `3`. - /// - /// This position factors into the effectiveness of this prefilter. If the - /// current position is less than the last position at which a scan ended, - /// then the prefilter should not be re-run until the search moves past - /// that position. - #[inline] - fn update_at(&mut self, at: usize) { - if at > self.last_scan_at { - self.last_scan_at = at; - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - /// - /// The given pos should correspond to the current starting position of the - /// search. - #[inline] - pub fn is_effective(&mut self, at: usize) -> bool { - if self.inert { - return false; - } - if at < self.last_scan_at { - return false; - } - if self.skips < PrefilterState::MIN_SKIPS { - return true; - } - - let min_avg = PrefilterState::MIN_AVG_FACTOR * self.max_match_len; - if self.skipped >= min_avg * self.skips { - return true; - } - - // We're inert. - self.inert = true; - false - } -} - -/// A builder for constructing the best possible prefilter. When constructed, -/// this builder will heuristically select the best prefilter it can build, -/// if any, and discard the rest. -#[derive(Debug)] -pub struct Builder { - count: usize, - ascii_case_insensitive: bool, - start_bytes: StartBytesBuilder, - rare_bytes: RareBytesBuilder, - packed: Option, -} - -impl Builder { - /// Create a new builder for constructing the best possible prefilter. - pub fn new(kind: MatchKind) -> Builder { - let pbuilder = kind - .as_packed() - .map(|kind| packed::Config::new().match_kind(kind).builder()); - Builder { - count: 0, - ascii_case_insensitive: false, - start_bytes: StartBytesBuilder::new(), - rare_bytes: RareBytesBuilder::new(), - packed: pbuilder, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - pub fn ascii_case_insensitive(mut self, yes: bool) -> Builder { - self.ascii_case_insensitive = yes; - self.start_bytes = self.start_bytes.ascii_case_insensitive(yes); - self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes); - self - } - - /// Return a prefilter suitable for quickly finding potential matches. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - pub fn build(&self) -> Option { - // match (self.start_bytes.build(), self.rare_bytes.build()) { - match (self.start_bytes.build(), self.rare_bytes.build()) { - // If we could build both start and rare prefilters, then there are - // a few cases in which we'd want to use the start-byte prefilter - // over the rare-byte prefilter, since the former has lower - // overhead. - (prestart @ Some(_), prerare @ Some(_)) => { - // If the start-byte prefilter can scan for a smaller number - // of bytes than the rare-byte prefilter, then it's probably - // faster. - let has_fewer_bytes = - self.start_bytes.count < self.rare_bytes.count; - // Otherwise, if the combined frequency rank of the detected - // bytes in the start-byte prefilter is "close" to the combined - // frequency rank of the rare-byte prefilter, then we pick - // the start-byte prefilter even if the rare-byte prefilter - // heuristically searches for rare bytes. This is because the - // rare-byte prefilter has higher constant costs, so we tend to - // prefer the start-byte prefilter when we can. - let has_rarer_bytes = - self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50; - if has_fewer_bytes || has_rarer_bytes { - prestart - } else { - prerare - } - } - (prestart @ Some(_), None) => prestart, - (None, prerare @ Some(_)) => prerare, - (None, None) if self.ascii_case_insensitive => None, - (None, None) => self - .packed - .as_ref() - .and_then(|b| b.build()) - .map(|s| PrefilterObj::new(Packed(s))), - } - } - - /// Add a literal string to this prefilter builder. - pub fn add(&mut self, bytes: &[u8]) { - self.count += 1; - self.start_bytes.add(bytes); - self.rare_bytes.add(bytes); - if let Some(ref mut pbuilder) = self.packed { - pbuilder.add(bytes); - } - } -} - -/// A type that wraps a packed searcher and implements the `Prefilter` -/// interface. -#[derive(Clone, Debug)] -struct Packed(packed::Searcher); - -impl Prefilter for Packed { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - self.0.find_at(haystack, at).map_or(Candidate::None, Candidate::Match) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - self.0.heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - false - } -} - -/// A builder for constructing a rare byte prefilter. -/// -/// A rare byte prefilter attempts to pick out a small set of rare bytes that -/// occurr in the patterns, and then quickly scan to matches of those rare -/// bytes. -#[derive(Clone, Debug)] -struct RareBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// A set of rare bytes, indexed by byte value. - rare_set: ByteSet, - /// A set of byte offsets associated with bytes in a pattern. An entry - /// corresponds to a particular bytes (its index) and is only non-zero if - /// the byte occurred at an offset greater than 0 in at least one pattern. - /// - /// If a byte's offset is not representable in 8 bits, then the rare bytes - /// prefilter becomes inert. - byte_offsets: RareByteOffsets, - /// Whether this is available as a prefilter or not. This can be set to - /// false during construction if a condition is seen that invalidates the - /// use of the rare-byte prefilter. - available: bool, - /// The number of bytes set to an active value in `byte_offsets`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -/// A set of bytes. -#[derive(Clone, Copy)] -struct ByteSet([bool; 256]); - -impl ByteSet { - fn empty() -> ByteSet { - ByteSet([false; 256]) - } - - fn insert(&mut self, b: u8) -> bool { - let new = !self.contains(b); - self.0[b as usize] = true; - new - } - - fn contains(&self, b: u8) -> bool { - self.0[b as usize] - } -} - -impl fmt::Debug for ByteSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = vec![]; - for b in 0..=255 { - if self.contains(b) { - bytes.push(b); - } - } - f.debug_struct("ByteSet").field("set", &bytes).finish() - } -} - -/// A set of byte offsets, keyed by byte. -#[derive(Clone, Copy)] -struct RareByteOffsets { - /// Each entry corresponds to the maximum offset of the corresponding - /// byte across all patterns seen. - set: [RareByteOffset; 256], -} - -impl RareByteOffsets { - /// Create a new empty set of rare byte offsets. - pub fn empty() -> RareByteOffsets { - RareByteOffsets { set: [RareByteOffset::default(); 256] } - } - - /// Add the given offset for the given byte to this set. If the offset is - /// greater than the existing offset, then it overwrites the previous - /// value and returns false. If there is no previous value set, then this - /// sets it and returns true. - pub fn set(&mut self, byte: u8, off: RareByteOffset) { - self.set[byte as usize].max = - cmp::max(self.set[byte as usize].max, off.max); - } -} - -impl fmt::Debug for RareByteOffsets { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut offsets = vec![]; - for off in self.set.iter() { - if off.max > 0 { - offsets.push(off); - } - } - f.debug_struct("RareByteOffsets").field("set", &offsets).finish() - } -} - -/// Offsets associated with an occurrence of a "rare" byte in any of the -/// patterns used to construct a single Aho-Corasick automaton. -#[derive(Clone, Copy, Debug)] -struct RareByteOffset { - /// The maximum offset at which a particular byte occurs from the start - /// of any pattern. This is used as a shift amount. That is, when an - /// occurrence of this byte is found, the candidate position reported by - /// the prefilter is `position_of_byte - max`, such that the automaton - /// will begin its search at a position that is guaranteed to observe a - /// match. - /// - /// To avoid accidentally quadratic behavior, a prefilter is considered - /// ineffective when it is asked to start scanning from a position that it - /// has already scanned past. - /// - /// Using a `u8` here means that if we ever see a pattern that's longer - /// than 255 bytes, then the entire rare byte prefilter is disabled. - max: u8, -} - -impl Default for RareByteOffset { - fn default() -> RareByteOffset { - RareByteOffset { max: 0 } - } -} - -impl RareByteOffset { - /// Create a new rare byte offset. If the given offset is too big, then - /// None is returned. In that case, callers should render the rare bytes - /// prefilter inert. - fn new(max: usize) -> Option { - if max > u8::MAX as usize { - None - } else { - Some(RareByteOffset { max: max as u8 }) - } - } -} - -impl RareBytesBuilder { - /// Create a new builder for constructing a rare byte prefilter. - fn new() -> RareBytesBuilder { - RareBytesBuilder { - ascii_case_insensitive: false, - rare_set: ByteSet::empty(), - byte_offsets: RareByteOffsets::empty(), - available: true, - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the rare bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if !self.available || self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..=255 { - if self.rare_set.contains(b) { - bytes[len] = b as u8; - len += 1; - } - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(RareBytesOne { - byte1: bytes[0], - offset: self.byte_offsets.set[bytes[0] as usize], - })), - 2 => Some(PrefilterObj::new(RareBytesTwo { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(RareBytesThree { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - // If we've already given up, then do nothing. - if !self.available { - return; - } - // If we've already blown our budget, then don't waste time looking - // for more rare bytes. - if self.count > 3 { - self.available = false; - return; - } - // If the pattern is too long, then our offset table is bunk, so - // give up. - if bytes.len() >= 256 { - self.available = false; - return; - } - let mut rarest = match bytes.get(0) { - None => return, - Some(&b) => (b, freq_rank(b)), - }; - // The idea here is to look for the rarest byte in each pattern, and - // add that to our set. As a special exception, if we see a byte that - // we've already added, then we immediately stop and choose that byte, - // even if there's another rare byte in the pattern. This helps us - // apply the rare byte optimization in more cases by attempting to pick - // bytes that are in common between patterns. So for example, if we - // were searching for `Sherlock` and `lockjaw`, then this would pick - // `k` for both patterns, resulting in the use of `memchr` instead of - // `memchr2` for `k` and `j`. - let mut found = false; - for (pos, &b) in bytes.iter().enumerate() { - self.set_offset(pos, b); - if found { - continue; - } - if self.rare_set.contains(b) { - found = true; - continue; - } - let rank = freq_rank(b); - if rank < rarest.1 { - rarest = (b, rank); - } - } - if !found { - self.add_rare_byte(rarest.0); - } - } - - fn set_offset(&mut self, pos: usize, byte: u8) { - // This unwrap is OK because pos is never bigger than our max. - let offset = RareByteOffset::new(pos).unwrap(); - self.byte_offsets.set(byte, offset); - if self.ascii_case_insensitive { - self.byte_offsets.set(opposite_ascii_case(byte), offset); - } - } - - fn add_rare_byte(&mut self, byte: u8) { - self.add_one_rare_byte(byte); - if self.ascii_case_insensitive { - self.add_one_rare_byte(opposite_ascii_case(byte)); - } - } - - fn add_one_rare_byte(&mut self, byte: u8) { - if self.rare_set.insert(byte) { - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single "rare" byte. -#[derive(Clone, Debug)] -struct RareBytesOne { - byte1: u8, - offset: RareByteOffset, -} - -impl Prefilter for RareBytesOne { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.last_scan_at = pos; - cmp::max(at, pos.saturating_sub(self.offset.max as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: It should be possible to use a rare byte prefilter in a - // streaming context. The main problem is that we usually assume that - // if a prefilter has scanned some text and not found anything, then no - // match *starts* in that text. This doesn't matter in non-streaming - // contexts, but in a streaming context, if we're looking for a byte - // that doesn't start at the beginning of a match and don't find it, - // then it's still possible for a match to start at the end of the - // current buffer content. In order to fix this, the streaming searcher - // would need to become aware of prefilters that do this and use the - // appropriate offset in various places. It is quite a delicate change - // and probably shouldn't be attempted until streaming search has a - // better testing strategy. In particular, we'd really like to be able - // to vary the buffer size to force strange cases that occur at the - // edge of the buffer. If we make the buffer size minimal, then these - // cases occur more frequently and easier. - // - // This is also a bummer because this means that if the prefilter - // builder chose a rare byte prefilter, then a streaming search won't - // use any prefilter at all because the builder doesn't know how it's - // going to be used. Assuming we don't make streaming search aware of - // these special types of prefilters as described above, we could fix - // this by building a "backup" prefilter that could be used when the - // rare byte prefilter could not. But that's a bandaide. Sigh. - true - } -} - -/// A prefilter for scanning for two "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesTwo { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, -} - -impl Prefilter for RareBytesTwo { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A prefilter for scanning for three "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesThree { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for RareBytesThree { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A builder for constructing a starting byte prefilter. -/// -/// A starting byte prefilter is a simplistic prefilter that looks for possible -/// matches by reporting all positions corresponding to a particular byte. This -/// generally only takes affect when there are at most 3 distinct possible -/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two -/// distinct starting bytes (`f` and `b`), and this prefilter returns all -/// occurrences of either `f` or `b`. -/// -/// In some cases, a heuristic frequency analysis may determine that it would -/// be better not to use this prefilter even when there are 3 or fewer distinct -/// starting bytes. -#[derive(Clone, Debug)] -struct StartBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// The set of starting bytes observed. - byteset: Vec, - /// The number of bytes set to true in `byteset`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -impl StartBytesBuilder { - /// Create a new builder for constructing a start byte prefilter. - fn new() -> StartBytesBuilder { - StartBytesBuilder { - ascii_case_insensitive: false, - byteset: vec![false; 256], - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the starting bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..256 { - if !self.byteset[b] { - continue; - } - // We don't handle non-ASCII bytes for now. Getting non-ASCII - // bytes right is trickier, since we generally don't want to put - // a leading UTF-8 code unit into a prefilter that isn't ASCII, - // since they can frequently. Instead, it would be better to use a - // continuation byte, but this requires more sophisticated analysis - // of the automaton and a richer prefilter API. - if b > 0x7F { - return None; - } - bytes[len] = b as u8; - len += 1; - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })), - 2 => Some(PrefilterObj::new(StartBytesTwo { - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(StartBytesThree { - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - if self.count > 3 { - return; - } - if let Some(&byte) = bytes.get(0) { - self.add_one_byte(byte); - if self.ascii_case_insensitive { - self.add_one_byte(opposite_ascii_case(byte)); - } - } - } - - fn add_one_byte(&mut self, byte: u8) { - if !self.byteset[byte as usize] { - self.byteset[byte as usize] = true; - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single starting byte. -#[derive(Clone, Debug)] -struct StartBytesOne { - byte1: u8, -} - -impl Prefilter for StartBytesOne { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for two starting bytes. -#[derive(Clone, Debug)] -struct StartBytesTwo { - byte1: u8, - byte2: u8, -} - -impl Prefilter for StartBytesTwo { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for three starting bytes. -#[derive(Clone, Debug)] -struct StartBytesThree { - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for StartBytesThree { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// Return the next candidate reported by the given prefilter while -/// simultaneously updating the given prestate. -/// -/// The caller is responsible for checking the prestate before deciding whether -/// to initiate a search. -#[inline] -pub fn next( - prestate: &mut PrefilterState, - prefilter: P, - haystack: &[u8], - at: usize, -) -> Candidate { - let cand = prefilter.next_candidate(prestate, haystack, at); - match cand { - Candidate::None => { - prestate.update_skipped_bytes(haystack.len() - at); - } - Candidate::Match(ref m) => { - prestate.update_skipped_bytes(m.start() - at); - } - Candidate::PossibleStartOfMatch(i) => { - prestate.update_skipped_bytes(i - at); - } - } - cand -} - -/// If the given byte is an ASCII letter, then return it in the opposite case. -/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns -/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned. -pub fn opposite_ascii_case(b: u8) -> u8 { - if b'A' <= b && b <= b'Z' { - b.to_ascii_lowercase() - } else if b'a' <= b && b <= b'z' { - b.to_ascii_uppercase() - } else { - b - } -} - -/// Return the frequency rank of the given byte. The higher the rank, the more -/// common the byte (heuristically speaking). -fn freq_rank(b: u8) -> u8 { - use crate::byte_frequencies::BYTE_FREQUENCIES; - BYTE_FREQUENCIES[b as usize] -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let mut b = Builder::new(MatchKind::LeftmostFirst); - b.add(b"Sherlock"); - b.add(b"locjaw"); - // b.add(b"Sherlock"); - // b.add(b"Holmes"); - // b.add(b"Watson"); - // b.add("Шерлок Холмс".as_bytes()); - // b.add("Джон Уотсон".as_bytes()); - - let s = b.build().unwrap(); - println!("{:?}", s); - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,192 +0,0 @@ -use std::fmt::Debug; -use std::hash::Hash; - -use crate::error::{Error, Result}; - -// NOTE: Most of this code was copied from regex-automata, but without the -// (de)serialization specific stuff. - -/// Check that the premultiplication of the given state identifier can -/// fit into the representation indicated by `S`. If it cannot, or if it -/// overflows `usize` itself, then an error is returned. -pub fn premultiply_overflow_error( - last_state: S, - alphabet_len: usize, -) -> Result<()> { - let requested = match last_state.to_usize().checked_mul(alphabet_len) { - Some(requested) => requested, - None => return Err(Error::premultiply_overflow(0, 0)), - }; - if requested > S::max_id() { - return Err(Error::premultiply_overflow(S::max_id(), requested)); - } - Ok(()) -} - -/// Convert the given `usize` to the chosen state identifier -/// representation. If the given value cannot fit in the chosen -/// representation, then an error is returned. -pub fn usize_to_state_id(value: usize) -> Result { - if value > S::max_id() { - Err(Error::state_id_overflow(S::max_id())) - } else { - Ok(S::from_usize(value)) - } -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn fail_id() -> S { - S::from_usize(0) -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn dead_id() -> S { - S::from_usize(1) -} - -mod private { - /// Sealed stops crates other than aho-corasick from implementing any - /// traits that use it. - pub trait Sealed {} - impl Sealed for u8 {} - impl Sealed for u16 {} - impl Sealed for u32 {} - impl Sealed for u64 {} - impl Sealed for usize {} -} - -/// A trait describing the representation of an automaton's state identifier. -/// -/// The purpose of this trait is to safely express both the possible state -/// identifier representations that can be used in an automaton and to convert -/// between state identifier representations and types that can be used to -/// efficiently index memory (such as `usize`). -/// -/// In general, one should not need to implement this trait explicitly. Indeed, -/// for now, this trait is sealed such that it cannot be implemented by any -/// other type. In particular, this crate provides implementations for `u8`, -/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for -/// targets that can represent all corresponding values in a `usize`.) -pub trait StateID: - private::Sealed - + Clone - + Copy - + Debug - + Eq - + Hash - + PartialEq - + PartialOrd - + Ord -{ - /// Convert from a `usize` to this implementation's representation. - /// - /// Implementors may assume that `n <= Self::max_id`. That is, implementors - /// do not need to check whether `n` can fit inside this implementation's - /// representation. - fn from_usize(n: usize) -> Self; - - /// Convert this implementation's representation to a `usize`. - /// - /// Implementors must not return a `usize` value greater than - /// `Self::max_id` and must not permit overflow when converting between the - /// implementor's representation and `usize`. In general, the preferred - /// way for implementors to achieve this is to simply not provide - /// implementations of `StateID` that cannot fit into the target platform's - /// `usize`. - fn to_usize(self) -> usize; - - /// Return the maximum state identifier supported by this representation. - /// - /// Implementors must return a correct bound. Doing otherwise may result - /// in unspecified behavior (but will not violate memory safety). - fn max_id() -> usize; -} - -impl StateID for usize { - #[inline] - fn from_usize(n: usize) -> usize { - n - } - - #[inline] - fn to_usize(self) -> usize { - self - } - - #[inline] - fn max_id() -> usize { - ::std::usize::MAX - } -} - -impl StateID for u8 { - #[inline] - fn from_usize(n: usize) -> u8 { - n as u8 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u8::MAX as usize - } -} - -impl StateID for u16 { - #[inline] - fn from_usize(n: usize) -> u16 { - n as u16 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u16::MAX as usize - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl StateID for u32 { - #[inline] - fn from_usize(n: usize) -> u32 { - n as u32 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u32::MAX as usize - } -} - -#[cfg(target_pointer_width = "64")] -impl StateID for u64 { - #[inline] - fn from_usize(n: usize) -> u64 { - n as u64 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u64::MAX as usize - } -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1254 +0,0 @@ -use std::collections::HashMap; -use std::io; -use std::usize; - -use crate::{AhoCorasickBuilder, Match, MatchKind}; - -/// A description of a single test against an Aho-Corasick automaton. -/// -/// A single test may not necessarily pass on every configuration of an -/// Aho-Corasick automaton. The tests are categorized and grouped appropriately -/// below. -#[derive(Clone, Debug, Eq, PartialEq)] -struct SearchTest { - /// The name of this test, for debugging. - name: &'static str, - /// The patterns to search for. - patterns: &'static [&'static str], - /// The text to search. - haystack: &'static str, - /// Each match is a triple of (pattern_index, start, end), where - /// pattern_index is an index into `patterns` and `start`/`end` are indices - /// into `haystack`. - matches: &'static [(usize, usize, usize)], -} - -/// Short-hand constructor for SearchTest. We use it a lot below. -macro_rules! t { - ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { - SearchTest { - name: stringify!($name), - patterns: $patterns, - haystack: $haystack, - matches: $matches, - } - }; -} - -/// A collection of test groups. -type TestCollection = &'static [&'static [SearchTest]]; - -// Define several collections corresponding to the different type of match -// semantics supported by Aho-Corasick. These collections have some overlap, -// but each collection should have some tests that no other collection has. - -/// Tests for Aho-Corasick's standard non-overlapping match semantics. -const AC_STANDARD_NON_OVERLAPPING: TestCollection = - &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION]; - -/// Tests for Aho-Corasick's anchored standard non-overlapping match semantics. -const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection = - &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED]; - -/// Tests for Aho-Corasick's standard overlapping match semantics. -const AC_STANDARD_OVERLAPPING: TestCollection = - &[BASICS, OVERLAPPING, REGRESSION]; - -/// Tests for Aho-Corasick's anchored standard overlapping match semantics. -const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection = - &[ANCHORED_BASICS, ANCHORED_OVERLAPPING]; - -/// Tests for Aho-Corasick's leftmost-first match semantics. -const AC_LEFTMOST_FIRST: TestCollection = - &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION]; - -/// Tests for Aho-Corasick's anchored leftmost-first match semantics. -const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[ - ANCHORED_BASICS, - ANCHORED_NON_OVERLAPPING, - ANCHORED_LEFTMOST, - ANCHORED_LEFTMOST_FIRST, -]; - -/// Tests for Aho-Corasick's leftmost-longest match semantics. -const AC_LEFTMOST_LONGEST: TestCollection = - &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION]; - -/// Tests for Aho-Corasick's anchored leftmost-longest match semantics. -const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[ - ANCHORED_BASICS, - ANCHORED_NON_OVERLAPPING, - ANCHORED_LEFTMOST, - ANCHORED_LEFTMOST_LONGEST, -]; - -// Now define the individual tests that make up the collections above. - -/// A collection of tests for the Aho-Corasick algorithm that should always be -/// true regardless of match semantics. That is, all combinations of -/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} -/// should produce the same answer. -const BASICS: &'static [SearchTest] = &[ - t!(basic000, &[], "", &[]), - t!(basic001, &["a"], "", &[]), - t!(basic010, &["a"], "a", &[(0, 0, 1)]), - t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), - t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]), - t!(basic050, &["a"], "bba", &[(0, 2, 3)]), - t!(basic060, &["a"], "bbb", &[]), - t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), - t!(basic100, &["aa"], "", &[]), - t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), - t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), - t!(basic130, &["aa"], "abbab", &[]), - t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), - t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), - t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), - t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), - t!(basic300, &["a", "b"], "", &[]), - t!(basic310, &["a", "b"], "z", &[]), - t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), - t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), - t!( - basic340, - &["a", "b"], - "abba", - &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] - ), - t!( - basic350, - &["b", "a"], - "abba", - &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] - ), - t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), - t!(basic400, &["foo", "bar"], "", &[]), - t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), - t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), - t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), - t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), - t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), - t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), - t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), - t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), - t!(basic600, &[""], "", &[(0, 0, 0)]), - t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), - t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), - t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), - t!( - basic720, - &["yabcdef", "bcdeyabc", "abcdezghi"], - "yabcdezghi", - &[(2, 1, 10),] - ), -]; - -/// A collection of *anchored* tests for the Aho-Corasick algorithm that should -/// always be true regardless of match semantics. That is, all combinations of -/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should -/// produce the same answer. -const ANCHORED_BASICS: &'static [SearchTest] = &[ - t!(abasic000, &[], "", &[]), - t!(abasic010, &[""], "", &[(0, 0, 0)]), - t!(abasic020, &[""], "a", &[(0, 0, 0)]), - t!(abasic030, &[""], "abc", &[(0, 0, 0)]), - t!(abasic100, &["a"], "a", &[(0, 0, 1)]), - t!(abasic110, &["a"], "aa", &[(0, 0, 1)]), - t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1)]), - t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1)]), - t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]), - t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]), -]; - -/// Tests for non-overlapping standard match semantics. -/// -/// These tests generally shouldn't pass for leftmost-{first,longest}, although -/// some do in order to write clearer tests. For example, standard000 will -/// pass with leftmost-first semantics, but standard010 will not. We write -/// both to emphasize how the match semantics work. -const STANDARD: &'static [SearchTest] = &[ - t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]), - t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), - t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), - t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]), - t!( - standard400, - &["abcd", "bcd", "cd", "b"], - "abcd", - &[(3, 1, 2), (2, 2, 4),] - ), - t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]), - t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]), - t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), -]; - -/// Like STANDARD, but for anchored searches. -const STANDARD_ANCHORED: &'static [SearchTest] = &[ - t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]), - t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), - t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), - t!(astandard040, &["a", ""], "a", &[(1, 0, 0)]), - t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), - t!(astandard410, &["", "a"], "a", &[(0, 0, 0)]), - t!(astandard420, &["", "a"], "aa", &[(0, 0, 0)]), - t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0)]), - t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0)]), - t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0)]), -]; - -/// Tests for non-overlapping leftmost match semantics. These should pass for -/// both leftmost-first and leftmost-longest match kinds. Stated differently, -/// among ambiguous matches, the longest match and the match that appeared -/// first when constructing the automaton should always be the same. -const LEFTMOST: &'static [SearchTest] = &[ - t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]), - t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), - t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), - t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), - t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), - t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - leftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8), (0, 8, 9),] - ), -]; - -/// Like LEFTMOST, but for anchored searches. -const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ - t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]), - t!(aleftmost020, &["", ""], "a", &[(0, 0, 0)]), - t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1)]), - t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1)]), - t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]), - t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]), - t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]), - t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]), - t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]), - t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - aleftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - aleftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - aleftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8)] - ), -]; - -/// Tests for non-overlapping leftmost-first match semantics. These tests -/// should generally be specific to leftmost-first, which means they should -/// generally fail under leftmost-longest semantics. -const LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), - t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), - t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), - t!( - leftfirst310, - &["abcd", "b", "bce", "ce"], - "abce", - &[(1, 1, 2), (3, 2, 4),] - ), - t!( - leftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1), (2, 7, 9),] - ), - t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), - t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]), -]; - -/// Like LEFTMOST_FIRST, but for anchored searches. -const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0)]), - t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0)]), - t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]), - t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0)]), - t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]), - t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), - t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), - t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]), - t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]), - t!( - aleftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1)] - ), - t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]), - t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]), -]; - -/// Tests for non-overlapping leftmost-longest match semantics. These tests -/// should generally be specific to leftmost-longest, which means they should -/// generally fail under leftmost-first semantics. -const LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]), - t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]), - t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), - t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), - t!( - leftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), - t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), -]; - -/// Like LEFTMOST_LONGEST, but for anchored searches. -const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]), - t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]), - t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]), - t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]), - t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1)]), - t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), - t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), - t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]), - t!( - aleftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]), - t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]), -]; - -/// Tests for non-overlapping match semantics. -/// -/// Generally these tests shouldn't pass when using overlapping semantics. -/// These should pass for both standard and leftmost match semantics. -const NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), - t!( - nover100, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] - ), - t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), - t!(nover300, &["", ""], "", &[(0, 0, 0),]), - t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]), -]; - -/// Like NON_OVERLAPPING, but for anchored searches. -const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(anover030, &["abc", "bc"], "zazabcz", &[]), - t!(anover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), - t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]), - t!(anover300, &["", ""], "", &[(0, 0, 0),]), - t!(anover310, &["", ""], "a", &[(0, 0, 0)]), -]; - -/// Tests for overlapping match semantics. -/// -/// This only supports standard match semantics, since leftmost-{first,longest} -/// do not support overlapping matches. -const OVERLAPPING: &'static [SearchTest] = &[ - t!( - over000, - &["abcd", "bcd", "cd", "b"], - "abcd", - &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),] - ), - t!( - over010, - &["bcd", "cd", "b", "abcd"], - "abcd", - &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),] - ), - t!( - over020, - &["abcd", "bcd", "cd"], - "abcd", - &[(0, 0, 4), (1, 1, 4), (2, 2, 4),] - ), - t!( - over030, - &["bcd", "abcd", "cd"], - "abcd", - &[(1, 0, 4), (0, 1, 4), (2, 2, 4),] - ), - t!( - over040, - &["bcd", "cd", "abcd"], - "abcd", - &[(2, 0, 4), (0, 1, 4), (1, 2, 4),] - ), - t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]), - t!( - over100, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),] - ), - t!( - over200, - &["foo", "foo"], - "foobarfoo", - &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),] - ), - t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), - t!( - over310, - &["", ""], - "a", - &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),] - ), - t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]), - t!( - over330, - &["", "a", ""], - "a", - &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),] - ), - t!( - over340, - &["a", "", ""], - "a", - &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),] - ), - t!( - over350, - &["", "", "a"], - "a", - &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),] - ), - t!( - over360, - &["foo", "foofoo"], - "foofoo", - &[(0, 0, 3), (1, 0, 6), (0, 3, 6)] - ), -]; - -/// Like OVERLAPPING, but for anchored searches. -const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ - t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), - t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]), - t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]), - t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]), - t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]), - t!(aover050, &["abc", "bc"], "zazabcz", &[]), - t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), - t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]), - t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), - t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]), - t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]), - t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]), - t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]), - t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]), - t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]), -]; - -/// Tests for ASCII case insensitivity. -/// -/// These tests should all have the same behavior regardless of match semantics -/// or whether the search is overlapping. -const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[ - t!(acasei000, &["a"], "A", &[(0, 0, 1)]), - t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]), - t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]), - t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]), -]; - -/// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests. -const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]), - t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]), - t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]), -]; - -/// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests. -const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[ - t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]), - t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]), - // This is a regression test from: - // https://github.com/BurntSushi/aho-corasick/issues/68 - // Previously, it was reporting a duplicate (1, 3, 6) match. - t!( - acasei010, - &["abc", "def", "abcdef"], - "abcdef", - &[(0, 0, 3), (2, 0, 6), (1, 3, 6)] - ), -]; - -/// Regression tests that are applied to all Aho-Corasick combinations. -/// -/// If regression tests are needed for specific match semantics, then add them -/// to the appropriate group above. -const REGRESSION: &'static [SearchTest] = &[ - t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), - t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), - t!( - regression030, - &["libcore/", "libstd/"], - "libcore/char/methods.rs", - &[(0, 0, 8),] - ), - t!( - regression040, - &["libstd/", "libcore/"], - "libcore/char/methods.rs", - &[(1, 0, 8),] - ), - t!( - regression050, - &["\x00\x00\x01", "\x00\x00\x00"], - "\x00\x00\x00", - &[(1, 0, 3),] - ), - t!( - regression060, - &["\x00\x00\x00", "\x00\x00\x01"], - "\x00\x00\x00", - &[(0, 0, 3),] - ), -]; - -// Now define a test for each combination of things above that we want to run. -// Since there are a few different combinations for each collection of tests, -// we define a couple of macros to avoid repetition drudgery. The testconfig -// macro constructs the automaton from a given match kind, and runs the search -// tests one-by-one over the given collection. The `with` parameter allows one -// to configure the builder with additional parameters. The testcombo macro -// invokes testconfig in precisely this way: it sets up several tests where -// each one turns a different knob on AhoCorasickBuilder. - -macro_rules! testconfig { - (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .find_overlapping_iter(test.haystack) - .collect() - }); - } - }; - (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let buf = - io::BufReader::with_capacity(1, test.haystack.as_bytes()); - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .stream_find_iter(buf) - .map(|result| result.unwrap()) - .collect() - }); - } - }; - ($name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .find_iter(test.haystack) - .collect() - }); - } - }; -} - -macro_rules! testcombo { - ($name:ident, $collection:expr, $kind:ident) => { - mod $name { - use super::*; - - testconfig!(nfa_default, $collection, $kind, |_| ()); - testconfig!( - nfa_no_prefilter, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.prefilter(false); - } - ); - testconfig!( - nfa_all_sparse, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); - } - ); - testconfig!( - nfa_all_dense, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); - } - ); - testconfig!( - dfa_default, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } - ); - testconfig!( - dfa_no_prefilter, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).prefilter(false); - } - ); - testconfig!( - dfa_all_sparse, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); - } - ); - testconfig!( - dfa_all_dense, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); - } - ); - testconfig!( - dfa_no_byte_class, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); - } - ); - testconfig!( - dfa_no_premultiply, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); - } - ); - testconfig!( - dfa_no_byte_class_no_premultiply, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); - } - ); - } - }; -} - -// Write out the combinations. -testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); -testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); -testcombo!( - search_standard_nonoverlapping, - AC_STANDARD_NON_OVERLAPPING, - Standard -); - -// Write out the overlapping combo by hand since there is only one of them. -testconfig!( - overlapping, - search_standard_overlapping_nfa_default, - AC_STANDARD_OVERLAPPING, - Standard, - |_| () -); -testconfig!( - overlapping, - search_standard_overlapping_nfa_all_sparse, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); - } -); -testconfig!( - overlapping, - search_standard_overlapping_nfa_all_dense, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_default, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_all_sparse, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_all_dense, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_byte_class, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_premultiply, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_byte_class_no_premultiply, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); - } -); - -// Also write out tests manually for streams, since we only test the standard -// match semantics. We also don't bother testing different automaton -// configurations, since those are well covered by tests above. -testconfig!( - stream, - search_standard_stream_nfa_default, - AC_STANDARD_NON_OVERLAPPING, - Standard, - |_| () -); -testconfig!( - stream, - search_standard_stream_dfa_default, - AC_STANDARD_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } -); - -// Same thing for anchored searches. Write them out manually. -testconfig!( - search_standard_anchored_nfa_default, - AC_STANDARD_ANCHORED_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_standard_anchored_dfa_default, - AC_STANDARD_ANCHORED_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - overlapping, - search_standard_anchored_overlapping_nfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - overlapping, - search_standard_anchored_overlapping_dfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - search_leftmost_first_anchored_nfa_default, - AC_LEFTMOST_FIRST_ANCHORED, - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_leftmost_first_anchored_dfa_default, - AC_LEFTMOST_FIRST_ANCHORED, - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - search_leftmost_longest_anchored_nfa_default, - AC_LEFTMOST_LONGEST_ANCHORED, - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_leftmost_longest_anchored_dfa_default, - AC_LEFTMOST_LONGEST_ANCHORED, - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); - -// And also write out the test combinations for ASCII case insensitivity. -testconfig!( - acasei_standard_nfa_default, - &[ASCII_CASE_INSENSITIVE], - Standard, - |b: &mut AhoCorasickBuilder| { - b.prefilter(false).ascii_case_insensitive(true); - } -); -testconfig!( - acasei_standard_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - overlapping, - acasei_standard_overlapping_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - overlapping, - acasei_standard_overlapping_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - acasei_leftmost_first_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - acasei_leftmost_first_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - acasei_leftmost_longest_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - acasei_leftmost_longest_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); - -fn run_search_tests Vec>( - which: TestCollection, - mut f: F, -) { - let get_match_triples = - |matches: Vec| -> Vec<(usize, usize, usize)> { - matches - .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) - .collect() - }; - for &tests in which { - for test in tests { - assert_eq!( - test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}", - test.name, - test.patterns, - test.haystack - ); - } - } -} - -#[test] -fn search_tests_have_unique_names() { - let assert = |constname, tests: &[SearchTest]| { - let mut seen = HashMap::new(); // map from test name to position - for (i, test) in tests.iter().enumerate() { - if !seen.contains_key(test.name) { - seen.insert(test.name, i); - } else { - let last = seen[test.name]; - panic!( - "{} tests have duplicate names at positions {} and {}", - constname, last, i - ); - } - } - }; - assert("BASICS", BASICS); - assert("STANDARD", STANDARD); - assert("LEFTMOST", LEFTMOST); - assert("LEFTMOST_FIRST", LEFTMOST_FIRST); - assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); - assert("NON_OVERLAPPING", NON_OVERLAPPING); - assert("OVERLAPPING", OVERLAPPING); - assert("REGRESSION", REGRESSION); -} - -#[test] -#[should_panic] -fn stream_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(None::); - assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); -} - -#[test] -#[should_panic] -fn stream_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostLongest) - .build(None::); - assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); -} - -#[test] -#[should_panic] -fn overlapping_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(None::); - assert_eq!(fsm.find_overlapping_iter("").count(), 0); -} - -#[test] -#[should_panic] -fn overlapping_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostLongest) - .build(None::); - assert_eq!(fsm.find_overlapping_iter("").count(), 0); -} - -#[test] -fn state_id_too_small() { - let mut patterns = vec![]; - for c1 in (b'a'..b'z').map(|b| b as char) { - for c2 in (b'a'..b'z').map(|b| b as char) { - for c3 in (b'a'..b'z').map(|b| b as char) { - patterns.push(format!("{}{}{}", c1, c2, c3)); - } - } - } - let result = - AhoCorasickBuilder::new().build_with_size::(&patterns); - assert!(result.is_err()); -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/44 -// -// In short, this test ensures that enabling ASCII case insensitivity does not -// visit an exponential number of states when filling in failure transitions. -#[test] -fn regression_ascii_case_insensitive_no_exponential() { - let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]); - assert!(ac.find("").is_none()); -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/53 -// -// This test ensures that the rare byte prefilter works in a particular corner -// case. In particular, the shift offset detected for '/' in the patterns below -// was incorrect, leading to a false negative. -#[test] -fn regression_rare_byte_prefilter() { - use crate::AhoCorasick; - - let ac = AhoCorasick::new_auto_configured(&["ab/j/", "x/"]); - assert!(ac.is_match("ab/j/")); -} - -#[test] -fn regression_case_insensitive_prefilter() { - use crate::AhoCorasickBuilder; - - for c in b'a'..b'z' { - for c2 in b'a'..b'z' { - let c = c as char; - let c2 = c2 as char; - let needle = format!("{}{}", c, c2).to_lowercase(); - let haystack = needle.to_uppercase(); - let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .prefilter(true) - .build(&[&needle]); - assert_eq!( - 1, - ac.find_iter(&haystack).count(), - "failed to find {:?} in {:?}\n\nautomaton:\n{:?}", - needle, - haystack, - ac, - ); - } - } -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/64 -// -// This occurs when the rare byte prefilter is active. -#[test] -fn regression_stream_rare_byte_prefilter() { - use std::io::Read; - - // NOTE: The test only fails if this ends with j. - const MAGIC: [u8; 5] = *b"1234j"; - - // NOTE: The test fails for value in 8188..=8191 These value put the string - // to search accross two call to read because the buffer size is 8192 by - // default. - const BEGIN: usize = 8191; - - /// This is just a structure that implements Reader. The reader - /// implementation will simulate a file filled with 0, except for the MAGIC - /// string at offset BEGIN. - #[derive(Default)] - struct R { - read: usize, - } - - impl Read for R { - fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result { - //dbg!(buf.len()); - if self.read > 100000 { - return Ok(0); - } - let mut from = 0; - if self.read < BEGIN { - from = buf.len().min(BEGIN - self.read); - for x in 0..from { - buf[x] = 0; - } - self.read += from; - } - if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() { - let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from); - if to > from { - buf[from..to].copy_from_slice( - &MAGIC - [self.read - BEGIN..self.read - BEGIN + to - from], - ); - self.read += to - from; - from = to; - } - } - for x in from..buf.len() { - buf[x] = 0; - self.read += 1; - } - Ok(buf.len()) - } - } - - fn run() -> ::std::io::Result<()> { - let aut = AhoCorasickBuilder::new().build(&[&MAGIC]); - - // While reading from a vector, it works: - let mut buf = vec![]; - R::default().read_to_end(&mut buf)?; - let from_whole = aut.find_iter(&buf).next().unwrap().start(); - - //But using stream_find_iter fails! - let mut file = R::default(); - let begin = aut - .stream_find_iter(&mut file) - .next() - .expect("NOT FOUND!!!!")? // Panic here - .start(); - assert_eq!(from_whole, begin); - Ok(()) - } - - run().unwrap() -} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json 2023-08-25 21:18:50.000000000 +0000 @@ -1 +1 @@ -{"files":{"Cargo.lock":"a915231b52b67320c7d440eb711c99632e4b948e5dcbeb6835e18bd0d798c76a","Cargo.toml":"655f82f7efb9e6b434a4710d8b1ea5b8c2116bccc6d8a4f87a7abc9e0c69051b","LICENSE":"c23953d9deb0a3312dbeaf6c128a657f3591acee45067612fa68405eaa4525db","README.md":"c093205492ab9f00f275c50aacfc9058264d3dcc7c7d2ff83e0cc4858d1cee49","build.rs":"d53484feea4cd147cd80280ac270c24ab727386acabb043e6347c44ac5369d0e","csmith-fuzzing/README.md":"7107b70fedb0c0a0cadb3c439a49c1bd0119a6d38dc63b1aecc74d1942256ef2","src/callbacks.rs":"cb4ca440e356dde75919a5298b75cbf145c981c2e1da62907337706286dd5c9e","src/clang.rs":"6b02ae174012372d00b442b5ec5a66a6122a091217039e5ba4917578c769d01f","src/codegen/bitfield_unit.rs":"fddeaeab5859f4e82081865595b7705f5c0774d997df95fa5c655b81b9cae125","src/codegen/bitfield_unit_tests.rs":"9df86490de5e9d66ccea583dcb686dd440375dc1a3c3cf89a89d5de3883bf28a","src/codegen/dyngen.rs":"b1bca96fbd81b1c0678122df8d28f3b60cd74047a43d0d298d69feb06eecf459","src/codegen/error.rs":"5e308b8c54b68511fc8ea2ad15ddac510172c4ff460a80a265336440b0c9653d","src/codegen/helpers.rs":"b4e2ee991e83fda62b0aebd562b948eba785179cb4aa1a154d00ffad215b7be5","src/codegen/impl_debug.rs":"71d8e28873ba2de466f2637a824746963702f0511728037d72ee5670c51194cb","src/codegen/impl_partialeq.rs":"f4599e32c66179ae515a6518a3e94b686689cf59f7dd9ab618c3fb69f17d2c77","src/codegen/mod.rs":"a286fa9a31254ce317c4baad05af446b59aaa23fb80aba9f260e67d15c64ff8c","src/codegen/struct_layout.rs":"d03e66412f4bb1fa59c623873b2a22e100d029a002c07aaf4586f4852a410b54","src/deps.rs":"de4a91d1d252295e1abaf4ab1f90f7be618c67649edb12081c3a501e61398a75","src/extra_assertions.rs":"494534bd4f18b80d89b180c8a93733e6617edcf7deac413e9a73fd6e7bc9ced7","src/features.rs":"f93bb757400580a75adc6a187cdeb032ec4d6efe7d3fcb9a6864472edd875580","src/ir/analysis/derive.rs":"066d35cdb7523c5edd141394286911128261b4db23cc17520e3b3111ef1bb51e","src/ir/analysis/has_destructor.rs":"7a82f01e7e0595a31b56f7c398fa3019b3fe9100a2a73b56768f7e6943dcc3ce","src/ir/analysis/has_float.rs":"58ea1e38a59ef208400fd65d426cb5b288949df2d383b3a194fa01b99d2a87fa","src/ir/analysis/has_type_param_in_array.rs":"d1b9eb119dc14f662eb9bd1394c859f485479e4912589709cdd33f6752094e22","src/ir/analysis/has_vtable.rs":"368cf30fbe3fab7190fab48718b948caac5da8c9e797b709488716b919315636","src/ir/analysis/mod.rs":"cde4ce0667d1895008c9b2af479211c828740fcb59fa13d600cbdc100fa8bdc5","src/ir/analysis/sizedness.rs":"944443d6aab35d2dd80e4f5e59176ac1e1c463ba2f0eb25d33f1d95dfac1a6d0","src/ir/analysis/template_params.rs":"a2d2e247c2f51cd90e83f11bce0305c2e498232d015f88192b44e8522e7fd8b1","src/ir/annotations.rs":"456276ef7f9b04e40b7b10aa7570d98b11aae8efe676679881459ae878bbecfc","src/ir/comment.rs":"9c0c4789c0893b636fac42228f8a0292a06cb4f2b7431895490784dd16b7f79a","src/ir/comp.rs":"811a2abfbf8ed6925327ad005a460ca698d40a2d5d4698015e1bcd4e7d2c9cf0","src/ir/context.rs":"df486590515ffaab8b51c96699a239de202569a8718d9c4b79a8ccc8808cee69","src/ir/derive.rs":"e5581852eec87918901a129284b4965aefc8a19394187a8095779a084f28fabe","src/ir/dot.rs":"2d79d698e6ac59ce032840e62ff11103abed1d5e9e700cf383b492333eeebe1f","src/ir/enum_ty.rs":"c2d928bb1a8453922c962cb11a7ab3b737c5651599141ece8d31e21e6eb74585","src/ir/function.rs":"3e13078b36ee02142017cfbbaaeb9e64ef485a12e151096e12f54a8fde984505","src/ir/int.rs":"68a86182743ec338d58e42203364dc7c8970cb7ec3550433ca92f0c9489b4442","src/ir/item.rs":"1c79d6dd400ab01545a19214847245b440690bfe129895f164bef460ee41b857","src/ir/item_kind.rs":"7666a1ff1b8260978b790a08b4139ab56b5c65714a5652bbcec7faa7443adc36","src/ir/layout.rs":"d6bd9a14b94320f9e2517bf9fc9ffaf4220954fa24d77d90bba070dbede7392b","src/ir/mod.rs":"713cd537434567003197a123cbae679602c715e976d22f7b23dafd0826ea4c70","src/ir/module.rs":"7cae5561bcf84a5c3b1ee8f1c3336a33b7f44f0d5ffe885fb108289983fe763e","src/ir/objc.rs":"dd394c1db6546cbe5111ce5cd2f211f9839aba81c5e7228c2a68fba386bc259f","src/ir/template.rs":"3bb3e7f6ec28eff73c2032922d30b93d70da117b848e9cb02bdf6c9a74294f7f","src/ir/traversal.rs":"105d93bc2f1f55033c621667a0e55153844eec34560ae88183f799d0d0c1a6f2","src/ir/ty.rs":"2ecae57f018732b6daf1c08fc98765456a9e6a24cbceaf7f1bc004676b0113ee","src/ir/var.rs":"fe7720438af43fa3bbe3850aff331bb47131b2c21e975f92bfbcdc182789105a","src/lib.rs":"0f148aef6fd6ae814df29317fe5860d0c1747c40d5182f2518d3b81a03b6587a","src/log_stubs.rs":"9f974e041e35c8c7e29985d27ae5cd0858d68f8676d1dc005c6388d7d011707f","src/main.rs":"188cd89581490eb5f26a194cc25e4f38f3e0b93eed7ad591bc73362febd26b72","src/options.rs":"14190fae2aaad331f0660e4cc1d5a1fea0c2c88696091715867a3e7282a1d7b5","src/parse.rs":"4ffc54415eadb622ee488603862788c78361ef2c889de25259441a340c2a010f","src/regex_set.rs":"6c46357fb1ee68250e5e017cbf691f340041489ae78599eee7a5665a6ddce27f","src/time.rs":"8efe317e7c6b5ba8e0865ce7b49ca775ee8a02590f4241ef62f647fa3c22b68e"},"package":"2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"} \ No newline at end of file +{"files":{"Cargo.toml":"1c290771bddd3cde261935e253cd7574b648d1b321a0f3466d429eca3a3cce64","LICENSE":"c23953d9deb0a3312dbeaf6c128a657f3591acee45067612fa68405eaa4525db","build.rs":"4a9c4ac3759572e17de312a9d3f4ced3b6fd3c71811729e5a8d06bfbd1ac8f82","callbacks.rs":"985f5e3b19b870ec90baa89187b5049514fc5a259bc74fd6fb2ee857c52c11ff","clang.rs":"ee5130a029688f0eadc854c9873824330b6539e2eae597e2198b51e4e8f124a5","codegen/bitfield_unit.rs":"fddeaeab5859f4e82081865595b7705f5c0774d997df95fa5c655b81b9cae125","codegen/bitfield_unit_tests.rs":"9df86490de5e9d66ccea583dcb686dd440375dc1a3c3cf89a89d5de3883bf28a","codegen/dyngen.rs":"6d8bed53c6de66bc658b3186041c2b75549f49b0f0363ff18b87c8dcf2f5a05b","codegen/error.rs":"fa02274debd9064f35a627c43407e4e47ca89f2becfb1c233a500440d6c73e00","codegen/helpers.rs":"cf9e60d18d17d624f3559b6dd65e75630a16e6c1b71666f7c9656e51053d10f8","codegen/impl_debug.rs":"80df6136327b1ca8c7d1c2961290b5ab00b85b49b22c02f26a590bc68fb230af","codegen/impl_partialeq.rs":"db739d7ba6f5ba4033d6bf62c276f35217c20eab27230cf07dadf59e8b2f71bb","codegen/mod.rs":"89156a1926556d7c46b0266aabbb7c4e5a4a93fe1e5fc088f86acd3b14203f17","codegen/postprocessing/merge_extern_blocks.rs":"284457a3c75e945217bab4e5a4280fef0fcc03c31e12cc5010aab87f34c0b6c7","codegen/postprocessing/mod.rs":"160a6d6701cabf2514e23570df1bd1b648c909cc27b7c583f21d98fe0c16722e","codegen/postprocessing/sort_semantically.rs":"f465d1e8cc119082eb79c164b5cd780a370821e8bf56585b287dd3b51fc4a542","codegen/serialize.rs":"bb99633ab6a6764b84dac86a873fa64c90aa4979f26e75fbeff9af365b3fefa8","codegen/struct_layout.rs":"5685fc6caa24ac2779fbb885064043898830c00c92819e8c0e4fd9564c641c4d","deps.rs":"5ee2332fdb10325f3b0a0c6d9ba94e13eb631ef39e955fa958afc3625bdb5448","diagnostics.rs":"dc40cd5e9710922422c5c9420e2351f5d976e7a1d7275e4f4ce742cad9eb53f8","extra_assertions.rs":"494534bd4f18b80d89b180c8a93733e6617edcf7deac413e9a73fd6e7bc9ced7","features.rs":"6c17e37bdd14355c9c3f93b67e539bf001ea113a9efe287527e9021d785b5bda","ir/analysis/derive.rs":"cba290e9c4ba271e90524149ad3b874f37843bfdfab12d513cc85d2665447fd5","ir/analysis/has_destructor.rs":"e7e95c3b0989b6375cd3eabaac85a36ecc2915a1fd3700c7d26fe04e8dc83ba3","ir/analysis/has_float.rs":"a56b97bf913f132c2c63dc202b45c692c416a8c9fdc6b2baeee30362fb0d4405","ir/analysis/has_type_param_in_array.rs":"788ebb4ba2cf46a22f1e4ff3005d51f38d414b72e95355f7ff4125521e2d9525","ir/analysis/has_vtable.rs":"83efa40ae89147170eabdff1387e60aba574ca4cd4cdef22692753594f09d6c6","ir/analysis/mod.rs":"ed161d9f60306ad42af2ae70ff0eb686a36e2fb30eb94918b5e5f19af80e1db7","ir/analysis/sizedness.rs":"f0a9302f3c6ad694d76cfab11dbaf5392ecaf7f04bc7b211a5a003776b963896","ir/analysis/template_params.rs":"8f73a640cdd3b8e4e05fd5818eec8d36ba240ff131e8b785da3270c1335827a1","ir/annotations.rs":"eaacb6508b02d7d494bcaa50b9ba7acbe15f90f22130d3a24e2573909c08776f","ir/comment.rs":"4c9c20b5a3da086211e92adec0822831dbc0b7ebee98fee313edcae9ae8d55ec","ir/comp.rs":"fb32715ed8fc14bee51c344a41c1f7a8a802d4a6dceb2775034ea33a88670df7","ir/context.rs":"8b9f502e85ed563b46fc11eacb2e2140c19e7527dce4e31831cc9a571fbf87ff","ir/derive.rs":"c21e470bb0091f20bfa366110880d48984fc3cf7071fdf36eccfa64f3eca231c","ir/dot.rs":"75bdfd83d9e754ba726f6a5529ba1d9ff46f5bf49bf237452985eb008fed0854","ir/enum_ty.rs":"f4bfa6d18ba4977fb66f5d5e4a7674eded93b761404d91cdd6fdd50029db455a","ir/function.rs":"4cb04fbf40e8e8d4128c6182c84f21026b99446daf29ccba0871bedb275a5f81","ir/int.rs":"601736f0ad0949e40684a9ce89bafbfefa71743df6ee6c342e44888a0f141ae0","ir/item.rs":"5c0d0d2a7a327ac0c6ba1aadcef710b6d399c24bee3fbbd1ab6386e871c44033","ir/item_kind.rs":"33e21104b0bb824a696a52cd520567ae56158010a1df14777e68ac5f8ad7e8fa","ir/layout.rs":"e704c9c8cd1532f9890a1c6b43e3b6e691565b6acc2a9ce07486a4674497f511","ir/mod.rs":"a3b98b1732111a980a795c72eaf1e09101e842ef2de76b4f2d4a7857f8d4cee4","ir/module.rs":"f82f380274e9adbab8017bc5e484a23d945e2cb7a97ce17c9cd2a2cfc505bb54","ir/objc.rs":"0f55ff60db706241634ed8396108ec84ecbec80e0cf28f68ab580c868e0e0cb4","ir/template.rs":"3f59efa9670ca90215d4374be869c9dbecb98a8d1041e7c6e4ab69a62bb982c2","ir/traversal.rs":"a4ec73d3533d4b93386153baf6a2ca846ee51228c76ed51105229d3ddcd74466","ir/ty.rs":"7e479d601229619cf39073fc3570f4211666cc042a60ab27c810bdde0e5d5690","ir/var.rs":"40d18226706de0ee5f002d0b5617dbcba35de0605edd531c75e3a76d000f0f4f","lib.rs":"ef2927a0a84d50b6bea44d9e95f69d2dc9fc7bc75aff8fc3a5edd2919613a81c","log_stubs.rs":"9f974e041e35c8c7e29985d27ae5cd0858d68f8676d1dc005c6388d7d011707f","options/as_args.rs":"3b3547e08f0cb72fa042cde417bbc8760166d11dc0db4812e7a280c93074d2f5","options/helpers.rs":"f4a7681e29b2dcc3be9249478c499d685b9e29d4f4ca4ae8bff7a91668cd8f15","options/mod.rs":"f06194a21bf5b4a7039d1be80e5b0b3e4a310f48084a6e2b7abbb1539d0c2004","parse.rs":"fce3616e0464aa7414888e5d00d4df18c83bb3034a1c807d36a07a3c586e475a","regex_set.rs":"8b38dce6b4b34712f7eafcb2817024de18fccf0cead0c175de34f78ea4027545","time.rs":"8efe317e7c6b5ba8e0865ce7b49ca775ee8a02590f4241ef62f647fa3c22b68e"},"package":"cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"} \ No newline at end of file diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock 1970-01-01 00:00:00.000000000 +0000 @@ -1,446 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "aho-corasick" -version = "0.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" -dependencies = [ - "memchr", -] - -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "bindgen" -version = "0.59.2" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "clap", - "diff", - "env_logger", - "lazy_static", - "lazycell", - "log", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "tempfile", - "which", -] - -[[package]] -name = "bitflags" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clang-sys" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "2.33.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" -dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim", - "textwrap", - "unicode-width", - "vec_map", -] - -[[package]] -name = "diff" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" - -[[package]] -name = "either" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" - -[[package]] -name = "env_logger" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "getrandom" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "glob" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "libc" -version = "0.2.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" - -[[package]] -name = "libloading" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" -dependencies = [ - "cfg-if", - "winapi", -] - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "memchr" -version = "2.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" - -[[package]] -name = "minimal-lexical" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677" - -[[package]] -name = "nom" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" -dependencies = [ - "memchr", - "minimal-lexical", - "version_check", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - -[[package]] -name = "ppv-lite86" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" - -[[package]] -name = "proc-macro2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", - "rand_hc", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" -dependencies = [ - "rand_core", -] - -[[package]] -name = "redox_syscall" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "shlex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" - -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "tempfile" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" -dependencies = [ - "cfg-if", - "libc", - "rand", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "termcolor" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - -[[package]] -name = "version_check" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "which" -version = "4.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc009ab82a2afc94b9e467ab4214aee9cad1356cd9191264203d7d72006e00d" -dependencies = [ - "either", - "lazy_static", - "libc", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml 2023-08-25 21:18:50.000000000 +0000 @@ -11,28 +11,41 @@ [package] edition = "2018" +rust-version = "1.60.0" name = "bindgen" -version = "0.59.2" -authors = ["Jyun-Yan You ", "Emilio Cobos Álvarez ", "Nick Fitzgerald ", "The Servo project developers"] +version = "0.65.1" +authors = [ + "Jyun-Yan You ", + "Emilio Cobos Álvarez ", + "Nick Fitzgerald ", + "The Servo project developers", +] build = "build.rs" -include = ["LICENSE", "README.md", "Cargo.toml", "build.rs", "src/*.rs", "src/**/*.rs"] description = "Automatically generates Rust FFI bindings to C and C++ libraries." homepage = "https://rust-lang.github.io/rust-bindgen/" documentation = "https://docs.rs/bindgen" -readme = "README.md" -keywords = ["bindings", "ffi", "code-generation"] -categories = ["external-ffi-bindings", "development-tools::ffi"] +readme = "../README.md" +keywords = [ + "bindings", + "ffi", + "code-generation", +] +categories = [ + "external-ffi-bindings", + "development-tools::ffi", +] license = "BSD-3-Clause" repository = "https://github.com/rust-lang/rust-bindgen" [lib] -path = "src/lib.rs" - -[[bin]] name = "bindgen" -path = "src/main.rs" -doc = false -required-features = ["clap"] +path = "lib.rs" + +[dependencies.annotate-snippets] +version = "0.9.1" +features = ["color"] +optional = true + [dependencies.bitflags] version = "1.0.3" @@ -43,14 +56,6 @@ version = "1" features = ["clang_6_0"] -[dependencies.clap] -version = "2" -optional = true - -[dependencies.env_logger] -version = "0.9.0" -optional = true - [dependencies.lazy_static] version = "1" @@ -64,6 +69,9 @@ [dependencies.peeking_take_while] version = "0.1.2" +[dependencies.prettyplease] +version = "0.2.0" + [dependencies.proc-macro2] version = "1" default-features = false @@ -73,8 +81,11 @@ default-features = false [dependencies.regex] -version = "1.0" -features = ["std", "unicode"] +version = "1.5" +features = [ + "std", + "unicode", +] default-features = false [dependencies.rustc-hash] @@ -83,33 +94,31 @@ [dependencies.shlex] version = "1" +[dependencies.syn] +version = "2.0" +features = [ + "full", + "extra-traits", + "visit-mut", +] + [dependencies.which] version = "4.2.1" optional = true default-features = false -[dev-dependencies.clap] -version = "2" - -[dev-dependencies.diff] -version = "0.1" - -[dev-dependencies.shlex] -version = "1" - -[dev-dependencies.tempfile] -version = "3" [features] -default = ["logging", "clap", "runtime", "which-rustfmt"] -logging = ["env_logger", "log"] +__cli = [] +default = [ + "logging", + "runtime", + "which-rustfmt", +] +experimental = ["annotate-snippets"] +logging = ["log"] runtime = ["clang-sys/runtime"] static = ["clang-sys/static"] -testing_only_docs = [] testing_only_extra_assertions = [] -testing_only_libclang_3_9 = [] -testing_only_libclang_4 = [] testing_only_libclang_5 = [] testing_only_libclang_9 = [] which-rustfmt = ["which"] -[badges.travis-ci] -repository = "rust-lang/rust-bindgen" diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/README.md libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/README.md --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/README.md 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -[![crates.io](https://img.shields.io/crates/v/bindgen.svg)](https://crates.io/crates/bindgen) -[![docs.rs](https://docs.rs/bindgen/badge.svg)](https://docs.rs/bindgen/) - -# `bindgen` - -**`bindgen` automatically generates Rust FFI bindings to C (and some C++) libraries.** - -For example, given the C header `doggo.h`: - -```c -typedef struct Doggo { - int many; - char wow; -} Doggo; - -void eleven_out_of_ten_majestic_af(Doggo* pupper); -``` - -`bindgen` produces Rust FFI code allowing you to call into the `doggo` library's -functions and use its types: - -```rust -/* automatically generated by rust-bindgen 0.99.9 */ - -#[repr(C)] -pub struct Doggo { - pub many: ::std::os::raw::c_int, - pub wow: ::std::os::raw::c_char, -} - -extern "C" { - pub fn eleven_out_of_ten_majestic_af(pupper: *mut Doggo); -} -``` - -## Users Guide - -[📚 Read the `bindgen` users guide here! 📚](https://rust-lang.github.io/rust-bindgen) - -## MSRV - -The minimum supported Rust version is **1.46**. - -No MSRV bump policy has been established yet, so MSRV may increase in any release. - -## API Reference - -[API reference documentation is on docs.rs](https://docs.rs/bindgen) - -## Environment Variables - -In addition to the [library API](https://docs.rs/bindgen) and [executable command-line API][bindgen-cmdline], -`bindgen` can be controlled through environment variables. - -End-users should set these environment variables to modify `bindgen`'s behavior without modifying the source code of direct consumers of `bindgen`. - -- `BINDGEN_EXTRA_CLANG_ARGS`: extra arguments to pass to `clang` - - Arguments are whitespace-separated - - Use shell-style quoting to pass through whitespace - - Examples: - - Specify alternate sysroot: `--sysroot=/path/to/sysroot` - - Add include search path with spaces: `-I"/path/with spaces"` -- `BINDGEN_EXTRA_CLANG_ARGS_`: similar to `BINDGEN_EXTRA_CLANG_ARGS`, - but used to set per-target arguments to pass to clang. Useful to set system include - directories in a target-specific way in cross-compilation environments with multiple targets. - Has precedence over `BINDGEN_EXTRA_CLANG_ARGS`. - -Additionally, `bindgen` uses `libclang` to parse C and C++ header files. -To modify how `bindgen` searches for `libclang`, see the [`clang-sys` documentation][clang-sys-env]. -For more details on how `bindgen` uses `libclang`, see the [`bindgen` users guide][bindgen-book-clang]. - -## Releases - -We don't follow a specific release calendar, but if you need a release please -file an issue requesting that (ping `@emilio` for increased effectiveness). - -## Contributing - -[See `CONTRIBUTING.md` for hacking on `bindgen`!](./CONTRIBUTING.md) - -[bindgen-cmdline]: https://rust-lang.github.io/rust-bindgen/command-line-usage.html -[clang-sys-env]: https://github.com/KyleMayes/clang-sys#environment-variables -[bindgen-book-clang]: https://rust-lang.github.io/rust-bindgen/requirements.html#clang diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/build.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/build.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/build.rs 2022-11-23 18:55:06.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/build.rs 2023-08-25 21:18:50.000000000 +0000 @@ -1,76 +1,15 @@ -mod target { - use std::env; - use std::fs::File; - use std::io::Write; - use std::path::{Path, PathBuf}; - - pub fn main() { - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - - let mut dst = - File::create(Path::new(&out_dir).join("host-target.txt")).unwrap(); - dst.write_all(env::var("TARGET").unwrap().as_bytes()) - .unwrap(); - } -} - -mod testgen { - use std::char; - use std::env; - use std::ffi::OsStr; - use std::fs::{self, File}; - use std::io::Write; - use std::path::{Path, PathBuf}; - - pub fn main() { - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - let mut dst = - File::create(Path::new(&out_dir).join("tests.rs")).unwrap(); - - let manifest_dir = - PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - let headers_dir = manifest_dir.join("tests").join("headers"); - - let headers = match fs::read_dir(headers_dir) { - Ok(dir) => dir, - // We may not have headers directory after packaging. - Err(..) => return, - }; - - let entries = - headers.map(|result| result.expect("Couldn't read header file")); - - println!("cargo:rerun-if-changed=tests/headers"); - - for entry in entries { - match entry.path().extension().and_then(OsStr::to_str) { - Some("h") | Some("hpp") => { - let func = entry - .file_name() - .to_str() - .unwrap() - .replace(|c| !char::is_alphanumeric(c), "_") - .replace("__", "_") - .to_lowercase(); - writeln!( - dst, - "test_header!(header_{}, {:?});", - func, - entry.path(), - ) - .unwrap(); - } - _ => {} - } - } - - dst.flush().unwrap(); - } -} +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; fn main() { - target::main(); - testgen::main(); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + + let mut dst = + File::create(Path::new(&out_dir).join("host-target.txt")).unwrap(); + dst.write_all(env::var("TARGET").unwrap().as_bytes()) + .unwrap(); // On behalf of clang_sys, rebuild ourselves if important configuration // variables change, to ensure that bindings get rebuilt if the @@ -85,6 +24,6 @@ ); println!( "cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{}", - std::env::var("TARGET").unwrap().replace("-", "_") + std::env::var("TARGET").unwrap().replace('-', "_") ); } diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,178 @@ +//! A public API for more fine-grained customization of bindgen behavior. + +pub use crate::ir::analysis::DeriveTrait; +pub use crate::ir::derive::CanDerive as ImplementsTrait; +pub use crate::ir::enum_ty::{EnumVariantCustomBehavior, EnumVariantValue}; +pub use crate::ir::int::IntKind; +use std::fmt; + +/// An enum to allow ignoring parsing of macros. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MacroParsingBehavior { + /// Ignore the macro, generating no code for it, or anything that depends on + /// it. + Ignore, + /// The default behavior bindgen would have otherwise. + Default, +} + +impl Default for MacroParsingBehavior { + fn default() -> Self { + MacroParsingBehavior::Default + } +} + +/// A trait to allow configuring different kinds of types in different +/// situations. +pub trait ParseCallbacks: fmt::Debug { + #[cfg(feature = "__cli")] + #[doc(hidden)] + fn cli_args(&self) -> Vec { + vec![] + } + + /// This function will be run on every macro that is identified. + fn will_parse_macro(&self, _name: &str) -> MacroParsingBehavior { + MacroParsingBehavior::Default + } + + /// This function will run for every extern variable and function. The returned value determines + /// the name visible in the bindings. + fn generated_name_override( + &self, + _item_info: ItemInfo<'_>, + ) -> Option { + None + } + + /// This function will run for every extern variable and function. The returned value determines + /// the link name in the bindings. + fn generated_link_name_override( + &self, + _item_info: ItemInfo<'_>, + ) -> Option { + None + } + + /// The integer kind an integer macro should have, given a name and the + /// value of that macro, or `None` if you want the default to be chosen. + fn int_macro(&self, _name: &str, _value: i64) -> Option { + None + } + + /// This will be run on every string macro. The callback cannot influence the further + /// treatment of the macro, but may use the value to generate additional code or configuration. + fn str_macro(&self, _name: &str, _value: &[u8]) {} + + /// This will be run on every function-like macro. The callback cannot + /// influence the further treatment of the macro, but may use the value to + /// generate additional code or configuration. + /// + /// The first parameter represents the name and argument list (including the + /// parentheses) of the function-like macro. The second parameter represents + /// the expansion of the macro as a sequence of tokens. + fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} + + /// This function should return whether, given an enum variant + /// name, and value, this enum variant will forcibly be a constant. + fn enum_variant_behavior( + &self, + _enum_name: Option<&str>, + _original_variant_name: &str, + _variant_value: EnumVariantValue, + ) -> Option { + None + } + + /// Allows to rename an enum variant, replacing `_original_variant_name`. + fn enum_variant_name( + &self, + _enum_name: Option<&str>, + _original_variant_name: &str, + _variant_value: EnumVariantValue, + ) -> Option { + None + } + + /// Allows to rename an item, replacing `_original_item_name`. + fn item_name(&self, _original_item_name: &str) -> Option { + None + } + + /// This will be called on every file inclusion, with the full path of the included file. + fn include_file(&self, _filename: &str) {} + + /// This will be called every time `bindgen` reads an environment variable whether it has any + /// content or not. + fn read_env_var(&self, _key: &str) {} + + /// This will be called to determine whether a particular blocklisted type + /// implements a trait or not. This will be used to implement traits on + /// other types containing the blocklisted type. + /// + /// * `None`: use the default behavior + /// * `Some(ImplementsTrait::Yes)`: `_name` implements `_derive_trait` + /// * `Some(ImplementsTrait::Manually)`: any type including `_name` can't + /// derive `_derive_trait` but can implemented it manually + /// * `Some(ImplementsTrait::No)`: `_name` doesn't implement `_derive_trait` + fn blocklisted_type_implements_trait( + &self, + _name: &str, + _derive_trait: DeriveTrait, + ) -> Option { + None + } + + /// Provide a list of custom derive attributes. + /// + /// If no additional attributes are wanted, this function should return an + /// empty `Vec`. + fn add_derives(&self, _info: &DeriveInfo<'_>) -> Vec { + vec![] + } + + /// Process a source code comment. + fn process_comment(&self, _comment: &str) -> Option { + None + } +} + +/// Relevant information about a type to which new derive attributes will be added using +/// [`ParseCallbacks::add_derives`]. +#[derive(Debug)] +#[non_exhaustive] +pub struct DeriveInfo<'a> { + /// The name of the type. + pub name: &'a str, + /// The kind of the type. + pub kind: TypeKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// The kind of the current type. +pub enum TypeKind { + /// The type is a Rust `struct`. + Struct, + /// The type is a Rust `enum`. + Enum, + /// The type is a Rust `union`. + Union, +} + +/// A struct providing information about the item being passed to [`ParseCallbacks::generated_name_override`]. +#[non_exhaustive] +pub struct ItemInfo<'a> { + /// The name of the item + pub name: &'a str, + /// The kind of item + pub kind: ItemKind, +} + +/// An enum indicating the kind of item for an ItemInfo. +#[non_exhaustive] +pub enum ItemKind { + /// A Function + Function, + /// A Variable + Var, +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/clang.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/clang.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/clang.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/clang.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,2236 @@ +//! A higher level Clang API built on top of the generated bindings in the +//! `clang_sys` module. + +#![allow(non_upper_case_globals, dead_code)] +#![deny(clippy::missing_docs_in_private_items)] + +use crate::ir::context::BindgenContext; +use clang_sys::*; +use std::ffi::{CStr, CString}; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; +use std::os::raw::{c_char, c_int, c_longlong, c_uint, c_ulong, c_ulonglong}; +use std::{mem, ptr, slice}; + +/// Type representing a clang attribute. +/// +/// Values of this type can be used to check for different attributes using the `has_attrs` +/// function. +pub(crate) struct Attribute { + name: &'static [u8], + kind: Option, + token_kind: CXTokenKind, +} + +impl Attribute { + /// A `warn_unused_result` attribute. + pub(crate) const MUST_USE: Self = Self { + name: b"warn_unused_result", + // FIXME(emilio): clang-sys doesn't expose `CXCursor_WarnUnusedResultAttr` (from clang 9). + kind: Some(440), + token_kind: CXToken_Identifier, + }; + + /// A `_Noreturn` attribute. + pub(crate) const NO_RETURN: Self = Self { + name: b"_Noreturn", + kind: None, + token_kind: CXToken_Keyword, + }; + + /// A `[[noreturn]]` attribute. + pub(crate) const NO_RETURN_CPP: Self = Self { + name: b"noreturn", + kind: None, + token_kind: CXToken_Identifier, + }; +} + +/// A cursor into the Clang AST, pointing to an AST node. +/// +/// We call the AST node pointed to by the cursor the cursor's "referent". +#[derive(Copy, Clone)] +pub(crate) struct Cursor { + x: CXCursor, +} + +impl fmt::Debug for Cursor { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "Cursor({} kind: {}, loc: {}, usr: {:?})", + self.spelling(), + kind_to_str(self.kind()), + self.location(), + self.usr() + ) + } +} + +impl Cursor { + /// Get the Unified Symbol Resolution for this cursor's referent, if + /// available. + /// + /// The USR can be used to compare entities across translation units. + pub(crate) fn usr(&self) -> Option { + let s = unsafe { cxstring_into_string(clang_getCursorUSR(self.x)) }; + if s.is_empty() { + None + } else { + Some(s) + } + } + + /// Is this cursor's referent a declaration? + pub(crate) fn is_declaration(&self) -> bool { + unsafe { clang_isDeclaration(self.kind()) != 0 } + } + + /// Is this cursor's referent an anonymous record or so? + pub(crate) fn is_anonymous(&self) -> bool { + unsafe { clang_Cursor_isAnonymous(self.x) != 0 } + } + + /// Get this cursor's referent's spelling. + pub(crate) fn spelling(&self) -> String { + unsafe { cxstring_into_string(clang_getCursorSpelling(self.x)) } + } + + /// Get this cursor's referent's display name. + /// + /// This is not necessarily a valid identifier. It includes extra + /// information, such as parameters for a function, etc. + pub(crate) fn display_name(&self) -> String { + unsafe { cxstring_into_string(clang_getCursorDisplayName(self.x)) } + } + + /// Get the mangled name of this cursor's referent. + pub(crate) fn mangling(&self) -> String { + unsafe { cxstring_into_string(clang_Cursor_getMangling(self.x)) } + } + + /// Gets the C++ manglings for this cursor, or an error if the manglings + /// are not available. + pub(crate) fn cxx_manglings(&self) -> Result, ()> { + use clang_sys::*; + unsafe { + let manglings = clang_Cursor_getCXXManglings(self.x); + if manglings.is_null() { + return Err(()); + } + let count = (*manglings).Count as usize; + + let mut result = Vec::with_capacity(count); + for i in 0..count { + let string_ptr = (*manglings).Strings.add(i); + result.push(cxstring_to_string_leaky(*string_ptr)); + } + clang_disposeStringSet(manglings); + Ok(result) + } + } + + /// Returns whether the cursor refers to a built-in definition. + pub(crate) fn is_builtin(&self) -> bool { + let (file, _, _, _) = self.location().location(); + file.name().is_none() + } + + /// Get the `Cursor` for this cursor's referent's lexical parent. + /// + /// The lexical parent is the parent of the definition. The semantic parent + /// is the parent of the declaration. Generally, the lexical parent doesn't + /// have any effect on semantics, while the semantic parent does. + /// + /// In the following snippet, the `Foo` class would be the semantic parent + /// of the out-of-line `method` definition, while the lexical parent is the + /// translation unit. + /// + /// ```c++ + /// class Foo { + /// void method(); + /// }; + /// + /// void Foo::method() { /* ... */ } + /// ``` + pub(crate) fn lexical_parent(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getCursorLexicalParent(self.x), + } + } + } + + /// Get the referent's semantic parent, if one is available. + /// + /// See documentation for `lexical_parent` for details on semantic vs + /// lexical parents. + pub(crate) fn fallible_semantic_parent(&self) -> Option { + let sp = unsafe { + Cursor { + x: clang_getCursorSemanticParent(self.x), + } + }; + if sp == *self || !sp.is_valid() { + return None; + } + Some(sp) + } + + /// Get the referent's semantic parent. + /// + /// See documentation for `lexical_parent` for details on semantic vs + /// lexical parents. + pub(crate) fn semantic_parent(&self) -> Cursor { + self.fallible_semantic_parent().unwrap() + } + + /// Return the number of template arguments used by this cursor's referent, + /// if the referent is either a template instantiation. Returns `None` + /// otherwise. + /// + /// NOTE: This may not return `Some` for partial template specializations, + /// see #193 and #194. + pub(crate) fn num_template_args(&self) -> Option { + // XXX: `clang_Type_getNumTemplateArguments` is sort of reliable, while + // `clang_Cursor_getNumTemplateArguments` is totally unreliable. + // Therefore, try former first, and only fallback to the latter if we + // have to. + self.cur_type() + .num_template_args() + .or_else(|| { + let n: c_int = + unsafe { clang_Cursor_getNumTemplateArguments(self.x) }; + + if n >= 0 { + Some(n as u32) + } else { + debug_assert_eq!(n, -1); + None + } + }) + .or_else(|| { + let canonical = self.canonical(); + if canonical != *self { + canonical.num_template_args() + } else { + None + } + }) + } + + /// Get a cursor pointing to this referent's containing translation unit. + /// + /// Note that we shouldn't create a `TranslationUnit` struct here, because + /// bindgen assumes there will only be one of them alive at a time, and + /// disposes it on drop. That can change if this would be required, but I + /// think we can survive fine without it. + pub(crate) fn translation_unit(&self) -> Cursor { + assert!(self.is_valid()); + unsafe { + let tu = clang_Cursor_getTranslationUnit(self.x); + let cursor = Cursor { + x: clang_getTranslationUnitCursor(tu), + }; + assert!(cursor.is_valid()); + cursor + } + } + + /// Is the referent a top level construct? + pub(crate) fn is_toplevel(&self) -> bool { + let mut semantic_parent = self.fallible_semantic_parent(); + + while semantic_parent.is_some() && + (semantic_parent.unwrap().kind() == CXCursor_Namespace || + semantic_parent.unwrap().kind() == + CXCursor_NamespaceAlias || + semantic_parent.unwrap().kind() == CXCursor_NamespaceRef) + { + semantic_parent = + semantic_parent.unwrap().fallible_semantic_parent(); + } + + let tu = self.translation_unit(); + // Yes, this can happen with, e.g., macro definitions. + semantic_parent == tu.fallible_semantic_parent() + } + + /// There are a few kinds of types that we need to treat specially, mainly + /// not tracking the type declaration but the location of the cursor, given + /// clang doesn't expose a proper declaration for these types. + pub(crate) fn is_template_like(&self) -> bool { + matches!( + self.kind(), + CXCursor_ClassTemplate | + CXCursor_ClassTemplatePartialSpecialization | + CXCursor_TypeAliasTemplateDecl + ) + } + + /// Is this Cursor pointing to a function-like macro definition? + pub(crate) fn is_macro_function_like(&self) -> bool { + unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 } + } + + /// Get the kind of referent this cursor is pointing to. + pub(crate) fn kind(&self) -> CXCursorKind { + self.x.kind + } + + /// Returns true if the cursor is a definition + pub(crate) fn is_definition(&self) -> bool { + unsafe { clang_isCursorDefinition(self.x) != 0 } + } + + /// Is the referent a template specialization? + pub(crate) fn is_template_specialization(&self) -> bool { + self.specialized().is_some() + } + + /// Is the referent a fully specialized template specialization without any + /// remaining free template arguments? + pub(crate) fn is_fully_specialized_template(&self) -> bool { + self.is_template_specialization() && + self.kind() != CXCursor_ClassTemplatePartialSpecialization && + self.num_template_args().unwrap_or(0) > 0 + } + + /// Is the referent a template specialization that still has remaining free + /// template arguments? + pub(crate) fn is_in_non_fully_specialized_template(&self) -> bool { + if self.is_toplevel() { + return false; + } + + let parent = self.semantic_parent(); + if parent.is_fully_specialized_template() { + return false; + } + + if !parent.is_template_like() { + return parent.is_in_non_fully_specialized_template(); + } + + true + } + + /// Is the referent any kind of template parameter? + pub(crate) fn is_template_parameter(&self) -> bool { + matches!( + self.kind(), + CXCursor_TemplateTemplateParameter | + CXCursor_TemplateTypeParameter | + CXCursor_NonTypeTemplateParameter + ) + } + + /// Does the referent's type or value depend on a template parameter? + pub(crate) fn is_dependent_on_template_parameter(&self) -> bool { + fn visitor( + found_template_parameter: &mut bool, + cur: Cursor, + ) -> CXChildVisitResult { + // If we found a template parameter, it is dependent. + if cur.is_template_parameter() { + *found_template_parameter = true; + return CXChildVisit_Break; + } + + // Get the referent and traverse it as well. + if let Some(referenced) = cur.referenced() { + if referenced.is_template_parameter() { + *found_template_parameter = true; + return CXChildVisit_Break; + } + + referenced + .visit(|next| visitor(found_template_parameter, next)); + if *found_template_parameter { + return CXChildVisit_Break; + } + } + + // Continue traversing the AST at the original cursor. + CXChildVisit_Recurse + } + + if self.is_template_parameter() { + return true; + } + + let mut found_template_parameter = false; + self.visit(|next| visitor(&mut found_template_parameter, next)); + + found_template_parameter + } + + /// Is this cursor pointing a valid referent? + pub(crate) fn is_valid(&self) -> bool { + unsafe { clang_isInvalid(self.kind()) == 0 } + } + + /// Get the source location for the referent. + pub(crate) fn location(&self) -> SourceLocation { + unsafe { + SourceLocation { + x: clang_getCursorLocation(self.x), + } + } + } + + /// Get the source location range for the referent. + pub(crate) fn extent(&self) -> CXSourceRange { + unsafe { clang_getCursorExtent(self.x) } + } + + /// Get the raw declaration comment for this referent, if one exists. + pub(crate) fn raw_comment(&self) -> Option { + let s = unsafe { + cxstring_into_string(clang_Cursor_getRawCommentText(self.x)) + }; + if s.is_empty() { + None + } else { + Some(s) + } + } + + /// Get the referent's parsed comment. + pub(crate) fn comment(&self) -> Comment { + unsafe { + Comment { + x: clang_Cursor_getParsedComment(self.x), + } + } + } + + /// Get the referent's type. + pub(crate) fn cur_type(&self) -> Type { + unsafe { + Type { + x: clang_getCursorType(self.x), + } + } + } + + /// Given that this cursor's referent is a reference to another type, or is + /// a declaration, get the cursor pointing to the referenced type or type of + /// the declared thing. + pub(crate) fn definition(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getCursorDefinition(self.x), + }; + + if ret.is_valid() && ret.kind() != CXCursor_NoDeclFound { + Some(ret) + } else { + None + } + } + } + + /// Given that this cursor's referent is reference type, get the cursor + /// pointing to the referenced type. + pub(crate) fn referenced(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getCursorReferenced(self.x), + }; + + if ret.is_valid() { + Some(ret) + } else { + None + } + } + } + + /// Get the canonical cursor for this referent. + /// + /// Many types can be declared multiple times before finally being properly + /// defined. This method allows us to get the canonical cursor for the + /// referent type. + pub(crate) fn canonical(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getCanonicalCursor(self.x), + } + } + } + + /// Given that this cursor points to either a template specialization or a + /// template instantiation, get a cursor pointing to the template definition + /// that is being specialized. + pub(crate) fn specialized(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getSpecializedCursorTemplate(self.x), + }; + if ret.is_valid() { + Some(ret) + } else { + None + } + } + } + + /// Assuming that this cursor's referent is a template declaration, get the + /// kind of cursor that would be generated for its specializations. + pub(crate) fn template_kind(&self) -> CXCursorKind { + unsafe { clang_getTemplateCursorKind(self.x) } + } + + /// Traverse this cursor's referent and its children. + /// + /// Call the given function on each AST node traversed. + pub(crate) fn visit(&self, mut visitor: Visitor) + where + Visitor: FnMut(Cursor) -> CXChildVisitResult, + { + let data = &mut visitor as *mut Visitor; + unsafe { + clang_visitChildren(self.x, visit_children::, data.cast()); + } + } + + /// Collect all of this cursor's children into a vec and return them. + pub(crate) fn collect_children(&self) -> Vec { + let mut children = vec![]; + self.visit(|c| { + children.push(c); + CXChildVisit_Continue + }); + children + } + + /// Does this cursor have any children? + pub(crate) fn has_children(&self) -> bool { + let mut has_children = false; + self.visit(|_| { + has_children = true; + CXChildVisit_Break + }); + has_children + } + + /// Does this cursor have at least `n` children? + pub(crate) fn has_at_least_num_children(&self, n: usize) -> bool { + assert!(n > 0); + let mut num_left = n; + self.visit(|_| { + num_left -= 1; + if num_left == 0 { + CXChildVisit_Break + } else { + CXChildVisit_Continue + } + }); + num_left == 0 + } + + /// Returns whether the given location contains a cursor with the given + /// kind in the first level of nesting underneath (doesn't look + /// recursively). + pub(crate) fn contains_cursor(&self, kind: CXCursorKind) -> bool { + let mut found = false; + + self.visit(|c| { + if c.kind() == kind { + found = true; + CXChildVisit_Break + } else { + CXChildVisit_Continue + } + }); + + found + } + + /// Is the referent an inlined function? + pub(crate) fn is_inlined_function(&self) -> bool { + unsafe { clang_Cursor_isFunctionInlined(self.x) != 0 } + } + + /// Is the referent a defaulted function? + pub(crate) fn is_defaulted_function(&self) -> bool { + unsafe { clang_CXXMethod_isDefaulted(self.x) != 0 } + } + + /// Is the referent a deleted function? + pub(crate) fn is_deleted_function(&self) -> bool { + // Unfortunately, libclang doesn't yet have an API for checking if a + // member function is deleted, but the following should be a good + // enough approximation. + // Deleted functions are implicitly inline according to paragraph 4 of + // [dcl.fct.def.delete] in the C++ standard. Normal inline functions + // have a definition in the same translation unit, so if this is an + // inline function without a definition, and it's not a defaulted + // function, we can reasonably safely conclude that it's a deleted + // function. + self.is_inlined_function() && + self.definition().is_none() && + !self.is_defaulted_function() + } + + /// Is the referent a bit field declaration? + pub(crate) fn is_bit_field(&self) -> bool { + unsafe { clang_Cursor_isBitField(self.x) != 0 } + } + + /// Get a cursor to the bit field's width expression, or `None` if it's not + /// a bit field. + pub(crate) fn bit_width_expr(&self) -> Option { + if !self.is_bit_field() { + return None; + } + + let mut result = None; + self.visit(|cur| { + // The first child may or may not be a TypeRef, depending on whether + // the field's type is builtin. Skip it. + if cur.kind() == CXCursor_TypeRef { + return CXChildVisit_Continue; + } + + // The next expression or literal is the bit width. + result = Some(cur); + + CXChildVisit_Break + }); + + result + } + + /// Get the width of this cursor's referent bit field, or `None` if the + /// referent is not a bit field or if the width could not be evaluated. + pub(crate) fn bit_width(&self) -> Option { + // It is not safe to check the bit width without ensuring it doesn't + // depend on a template parameter. See + // https://github.com/rust-lang/rust-bindgen/issues/2239 + if self.bit_width_expr()?.is_dependent_on_template_parameter() { + return None; + } + + unsafe { + let w = clang_getFieldDeclBitWidth(self.x); + if w == -1 { + None + } else { + Some(w as u32) + } + } + } + + /// Get the integer representation type used to hold this cursor's referent + /// enum type. + pub(crate) fn enum_type(&self) -> Option { + unsafe { + let t = Type { + x: clang_getEnumDeclIntegerType(self.x), + }; + if t.is_valid() { + Some(t) + } else { + None + } + } + } + + /// Get the boolean constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_boolean(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + Some(clang_getEnumConstantDeclValue(self.x) != 0) + } else { + None + } + } + } + + /// Get the signed constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_signed(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + #[allow(clippy::unnecessary_cast)] + Some(clang_getEnumConstantDeclValue(self.x) as i64) + } else { + None + } + } + } + + /// Get the unsigned constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_unsigned(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + #[allow(clippy::unnecessary_cast)] + Some(clang_getEnumConstantDeclUnsignedValue(self.x) as u64) + } else { + None + } + } + } + + /// Does this cursor have the given attributes? + pub(crate) fn has_attrs( + &self, + attrs: &[Attribute; N], + ) -> [bool; N] { + let mut found_attrs = [false; N]; + let mut found_count = 0; + + self.visit(|cur| { + let kind = cur.kind(); + for (idx, attr) in attrs.iter().enumerate() { + let found_attr = &mut found_attrs[idx]; + if !*found_attr { + // `attr.name` and` attr.token_kind` are checked against unexposed attributes only. + if attr.kind.map_or(false, |k| k == kind) || + (kind == CXCursor_UnexposedAttr && + cur.tokens().iter().any(|t| { + t.kind == attr.token_kind && + t.spelling() == attr.name + })) + { + *found_attr = true; + found_count += 1; + + if found_count == N { + return CXChildVisit_Break; + } + } + } + } + + CXChildVisit_Continue + }); + + found_attrs + } + + /// Given that this cursor's referent is a `typedef`, get the `Type` that is + /// being aliased. + pub(crate) fn typedef_type(&self) -> Option { + let inner = Type { + x: unsafe { clang_getTypedefDeclUnderlyingType(self.x) }, + }; + + if inner.is_valid() { + Some(inner) + } else { + None + } + } + + /// Get the linkage kind for this cursor's referent. + /// + /// This only applies to functions and variables. + pub(crate) fn linkage(&self) -> CXLinkageKind { + unsafe { clang_getCursorLinkage(self.x) } + } + + /// Get the visibility of this cursor's referent. + pub(crate) fn visibility(&self) -> CXVisibilityKind { + unsafe { clang_getCursorVisibility(self.x) } + } + + /// Given that this cursor's referent is a function, return cursors to its + /// parameters. + /// + /// Returns None if the cursor's referent is not a function/method call or + /// declaration. + pub(crate) fn args(&self) -> Option> { + // match self.kind() { + // CXCursor_FunctionDecl | + // CXCursor_CXXMethod => { + self.num_args().ok().map(|num| { + (0..num) + .map(|i| Cursor { + x: unsafe { clang_Cursor_getArgument(self.x, i as c_uint) }, + }) + .collect() + }) + } + + /// Given that this cursor's referent is a function/method call or + /// declaration, return the number of arguments it takes. + /// + /// Returns Err if the cursor's referent is not a function/method call or + /// declaration. + pub(crate) fn num_args(&self) -> Result { + unsafe { + let w = clang_Cursor_getNumArguments(self.x); + if w == -1 { + Err(()) + } else { + Ok(w as u32) + } + } + } + + /// Get the access specifier for this cursor's referent. + pub(crate) fn access_specifier(&self) -> CX_CXXAccessSpecifier { + unsafe { clang_getCXXAccessSpecifier(self.x) } + } + + /// Is the cursor's referrent publically accessible in C++? + /// + /// Returns true if self.access_specifier() is `CX_CXXPublic` or + /// `CX_CXXInvalidAccessSpecifier`. + pub(crate) fn public_accessible(&self) -> bool { + let access = self.access_specifier(); + access == CX_CXXPublic || access == CX_CXXInvalidAccessSpecifier + } + + /// Is this cursor's referent a field declaration that is marked as + /// `mutable`? + pub(crate) fn is_mutable_field(&self) -> bool { + unsafe { clang_CXXField_isMutable(self.x) != 0 } + } + + /// Get the offset of the field represented by the Cursor. + pub(crate) fn offset_of_field(&self) -> Result { + let offset = unsafe { clang_Cursor_getOffsetOfField(self.x) }; + + if offset < 0 { + Err(LayoutError::from(offset as i32)) + } else { + Ok(offset as usize) + } + } + + /// Is this cursor's referent a member function that is declared `static`? + pub(crate) fn method_is_static(&self) -> bool { + unsafe { clang_CXXMethod_isStatic(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is declared `const`? + pub(crate) fn method_is_const(&self) -> bool { + unsafe { clang_CXXMethod_isConst(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is virtual? + pub(crate) fn method_is_virtual(&self) -> bool { + unsafe { clang_CXXMethod_isVirtual(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is pure virtual? + pub(crate) fn method_is_pure_virtual(&self) -> bool { + unsafe { clang_CXXMethod_isPureVirtual(self.x) != 0 } + } + + /// Is this cursor's referent a struct or class with virtual members? + pub(crate) fn is_virtual_base(&self) -> bool { + unsafe { clang_isVirtualBase(self.x) != 0 } + } + + /// Try to evaluate this cursor. + pub(crate) fn evaluate(&self) -> Option { + EvalResult::new(*self) + } + + /// Return the result type for this cursor + pub(crate) fn ret_type(&self) -> Option { + let rt = Type { + x: unsafe { clang_getCursorResultType(self.x) }, + }; + if rt.is_valid() { + Some(rt) + } else { + None + } + } + + /// Gets the tokens that correspond to that cursor. + pub(crate) fn tokens(&self) -> RawTokens { + RawTokens::new(self) + } + + /// Gets the tokens that correspond to that cursor as `cexpr` tokens. + pub(crate) fn cexpr_tokens(self) -> Vec { + self.tokens() + .iter() + .filter_map(|token| token.as_cexpr_token()) + .collect() + } + + /// Obtain the real path name of a cursor of InclusionDirective kind. + /// + /// Returns None if the cursor does not include a file, otherwise the file's full name + pub(crate) fn get_included_file_name(&self) -> Option { + let file = unsafe { clang_sys::clang_getIncludedFile(self.x) }; + if file.is_null() { + None + } else { + Some(unsafe { + cxstring_into_string(clang_sys::clang_getFileName(file)) + }) + } + } +} + +/// A struct that owns the tokenizer result from a given cursor. +pub(crate) struct RawTokens<'a> { + cursor: &'a Cursor, + tu: CXTranslationUnit, + tokens: *mut CXToken, + token_count: c_uint, +} + +impl<'a> RawTokens<'a> { + fn new(cursor: &'a Cursor) -> Self { + let mut tokens = ptr::null_mut(); + let mut token_count = 0; + let range = cursor.extent(); + let tu = unsafe { clang_Cursor_getTranslationUnit(cursor.x) }; + unsafe { clang_tokenize(tu, range, &mut tokens, &mut token_count) }; + Self { + cursor, + tu, + tokens, + token_count, + } + } + + fn as_slice(&self) -> &[CXToken] { + if self.tokens.is_null() { + return &[]; + } + unsafe { slice::from_raw_parts(self.tokens, self.token_count as usize) } + } + + /// Get an iterator over these tokens. + pub(crate) fn iter(&self) -> ClangTokenIterator { + ClangTokenIterator { + tu: self.tu, + raw: self.as_slice().iter(), + } + } +} + +impl<'a> Drop for RawTokens<'a> { + fn drop(&mut self) { + if !self.tokens.is_null() { + unsafe { + clang_disposeTokens( + self.tu, + self.tokens, + self.token_count as c_uint, + ); + } + } + } +} + +/// A raw clang token, that exposes only kind, spelling, and extent. This is a +/// slightly more convenient version of `CXToken` which owns the spelling +/// string and extent. +#[derive(Debug)] +pub(crate) struct ClangToken { + spelling: CXString, + /// The extent of the token. This is the same as the relevant member from + /// `CXToken`. + pub(crate) extent: CXSourceRange, + /// The kind of the token. This is the same as the relevant member from + /// `CXToken`. + pub(crate) kind: CXTokenKind, +} + +impl ClangToken { + /// Get the token spelling, without being converted to utf-8. + pub(crate) fn spelling(&self) -> &[u8] { + let c_str = unsafe { + CStr::from_ptr(clang_getCString(self.spelling) as *const _) + }; + c_str.to_bytes() + } + + /// Converts a ClangToken to a `cexpr` token if possible. + pub(crate) fn as_cexpr_token(&self) -> Option { + use cexpr::token; + + let kind = match self.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => { + warn!("Found unexpected token kind: {:?}", self); + return None; + } + }; + + Some(token::Token { + kind, + raw: self.spelling().to_vec().into_boxed_slice(), + }) + } +} + +impl Drop for ClangToken { + fn drop(&mut self) { + unsafe { clang_disposeString(self.spelling) } + } +} + +/// An iterator over a set of Tokens. +pub(crate) struct ClangTokenIterator<'a> { + tu: CXTranslationUnit, + raw: slice::Iter<'a, CXToken>, +} + +impl<'a> Iterator for ClangTokenIterator<'a> { + type Item = ClangToken; + + fn next(&mut self) -> Option { + let raw = self.raw.next()?; + unsafe { + let kind = clang_getTokenKind(*raw); + let spelling = clang_getTokenSpelling(self.tu, *raw); + let extent = clang_getTokenExtent(self.tu, *raw); + Some(ClangToken { + kind, + extent, + spelling, + }) + } + } +} + +/// Checks whether the name looks like an identifier, i.e. is alphanumeric +/// (including '_') and does not start with a digit. +pub(crate) fn is_valid_identifier(name: &str) -> bool { + let mut chars = name.chars(); + let first_valid = chars + .next() + .map(|c| c.is_alphabetic() || c == '_') + .unwrap_or(false); + + first_valid && chars.all(|c| c.is_alphanumeric() || c == '_') +} + +extern "C" fn visit_children( + cur: CXCursor, + _parent: CXCursor, + data: CXClientData, +) -> CXChildVisitResult +where + Visitor: FnMut(Cursor) -> CXChildVisitResult, +{ + let func: &mut Visitor = unsafe { &mut *(data as *mut Visitor) }; + let child = Cursor { x: cur }; + + (*func)(child) +} + +impl PartialEq for Cursor { + fn eq(&self, other: &Cursor) -> bool { + unsafe { clang_equalCursors(self.x, other.x) == 1 } + } +} + +impl Eq for Cursor {} + +impl Hash for Cursor { + fn hash(&self, state: &mut H) { + unsafe { clang_hashCursor(self.x) }.hash(state) + } +} + +/// The type of a node in clang's AST. +#[derive(Clone, Copy)] +pub(crate) struct Type { + x: CXType, +} + +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + unsafe { clang_equalTypes(self.x, other.x) != 0 } + } +} + +impl Eq for Type {} + +impl fmt::Debug for Type { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "Type({}, kind: {}, cconv: {}, decl: {:?}, canon: {:?})", + self.spelling(), + type_to_str(self.kind()), + self.call_conv(), + self.declaration(), + self.declaration().canonical() + ) + } +} + +/// An error about the layout of a struct, class, or type. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub(crate) enum LayoutError { + /// Asked for the layout of an invalid type. + Invalid, + /// Asked for the layout of an incomplete type. + Incomplete, + /// Asked for the layout of a dependent type. + Dependent, + /// Asked for the layout of a type that does not have constant size. + NotConstantSize, + /// Asked for the layout of a field in a type that does not have such a + /// field. + InvalidFieldName, + /// An unknown layout error. + Unknown, +} + +impl ::std::convert::From for LayoutError { + fn from(val: i32) -> Self { + use self::LayoutError::*; + + match val { + CXTypeLayoutError_Invalid => Invalid, + CXTypeLayoutError_Incomplete => Incomplete, + CXTypeLayoutError_Dependent => Dependent, + CXTypeLayoutError_NotConstantSize => NotConstantSize, + CXTypeLayoutError_InvalidFieldName => InvalidFieldName, + _ => Unknown, + } + } +} + +impl Type { + /// Get this type's kind. + pub(crate) fn kind(&self) -> CXTypeKind { + self.x.kind + } + + /// Get a cursor pointing to this type's declaration. + pub(crate) fn declaration(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getTypeDeclaration(self.x), + } + } + } + + /// Get the canonical declaration of this type, if it is available. + pub(crate) fn canonical_declaration( + &self, + location: Option<&Cursor>, + ) -> Option { + let mut declaration = self.declaration(); + if !declaration.is_valid() { + if let Some(location) = location { + let mut location = *location; + if let Some(referenced) = location.referenced() { + location = referenced; + } + if location.is_template_like() { + declaration = location; + } + } + } + + let canonical = declaration.canonical(); + if canonical.is_valid() && canonical.kind() != CXCursor_NoDeclFound { + Some(CanonicalTypeDeclaration(*self, canonical)) + } else { + None + } + } + + /// Get a raw display name for this type. + pub(crate) fn spelling(&self) -> String { + let s = unsafe { cxstring_into_string(clang_getTypeSpelling(self.x)) }; + // Clang 5.0 introduced changes in the spelling API so it returned the + // full qualified name. Let's undo that here. + if s.split("::").all(is_valid_identifier) { + if let Some(s) = s.split("::").last() { + return s.to_owned(); + } + } + + s + } + + /// Is this type const qualified? + pub(crate) fn is_const(&self) -> bool { + unsafe { clang_isConstQualifiedType(self.x) != 0 } + } + + #[inline] + fn is_non_deductible_auto_type(&self) -> bool { + debug_assert_eq!(self.kind(), CXType_Auto); + self.canonical_type() == *self + } + + #[inline] + fn clang_size_of(&self, ctx: &BindgenContext) -> c_longlong { + match self.kind() { + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40975 + CXType_RValueReference | CXType_LValueReference => { + ctx.target_pointer_size() as c_longlong + } + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40813 + CXType_Auto if self.is_non_deductible_auto_type() => -6, + _ => unsafe { clang_Type_getSizeOf(self.x) }, + } + } + + #[inline] + fn clang_align_of(&self, ctx: &BindgenContext) -> c_longlong { + match self.kind() { + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40975 + CXType_RValueReference | CXType_LValueReference => { + ctx.target_pointer_size() as c_longlong + } + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40813 + CXType_Auto if self.is_non_deductible_auto_type() => -6, + _ => unsafe { clang_Type_getAlignOf(self.x) }, + } + } + + /// What is the size of this type? Paper over invalid types by returning `0` + /// for them. + pub(crate) fn size(&self, ctx: &BindgenContext) -> usize { + let val = self.clang_size_of(ctx); + if val < 0 { + 0 + } else { + val as usize + } + } + + /// What is the size of this type? + pub(crate) fn fallible_size( + &self, + ctx: &BindgenContext, + ) -> Result { + let val = self.clang_size_of(ctx); + if val < 0 { + Err(LayoutError::from(val as i32)) + } else { + Ok(val as usize) + } + } + + /// What is the alignment of this type? Paper over invalid types by + /// returning `0`. + pub(crate) fn align(&self, ctx: &BindgenContext) -> usize { + let val = self.clang_align_of(ctx); + if val < 0 { + 0 + } else { + val as usize + } + } + + /// What is the alignment of this type? + pub(crate) fn fallible_align( + &self, + ctx: &BindgenContext, + ) -> Result { + let val = self.clang_align_of(ctx); + if val < 0 { + Err(LayoutError::from(val as i32)) + } else { + Ok(val as usize) + } + } + + /// Get the layout for this type, or an error describing why it does not + /// have a valid layout. + pub(crate) fn fallible_layout( + &self, + ctx: &BindgenContext, + ) -> Result { + use crate::ir::layout::Layout; + let size = self.fallible_size(ctx)?; + let align = self.fallible_align(ctx)?; + Ok(Layout::new(size, align)) + } + + /// Get the number of template arguments this type has, or `None` if it is + /// not some kind of template. + pub(crate) fn num_template_args(&self) -> Option { + let n = unsafe { clang_Type_getNumTemplateArguments(self.x) }; + if n >= 0 { + Some(n as u32) + } else { + debug_assert_eq!(n, -1); + None + } + } + + /// If this type is a class template specialization, return its + /// template arguments. Otherwise, return None. + pub(crate) fn template_args(&self) -> Option { + self.num_template_args().map(|n| TypeTemplateArgIterator { + x: self.x, + length: n, + index: 0, + }) + } + + /// Given that this type is a function prototype, return the types of its parameters. + /// + /// Returns None if the type is not a function prototype. + pub(crate) fn args(&self) -> Option> { + self.num_args().ok().map(|num| { + (0..num) + .map(|i| Type { + x: unsafe { clang_getArgType(self.x, i as c_uint) }, + }) + .collect() + }) + } + + /// Given that this type is a function prototype, return the number of arguments it takes. + /// + /// Returns Err if the type is not a function prototype. + pub(crate) fn num_args(&self) -> Result { + unsafe { + let w = clang_getNumArgTypes(self.x); + if w == -1 { + Err(()) + } else { + Ok(w as u32) + } + } + } + + /// Given that this type is a pointer type, return the type that it points + /// to. + pub(crate) fn pointee_type(&self) -> Option { + match self.kind() { + CXType_Pointer | + CXType_RValueReference | + CXType_LValueReference | + CXType_MemberPointer | + CXType_BlockPointer | + CXType_ObjCObjectPointer => { + let ret = Type { + x: unsafe { clang_getPointeeType(self.x) }, + }; + debug_assert!(ret.is_valid()); + Some(ret) + } + _ => None, + } + } + + /// Given that this type is an array, vector, or complex type, return the + /// type of its elements. + pub(crate) fn elem_type(&self) -> Option { + let current_type = Type { + x: unsafe { clang_getElementType(self.x) }, + }; + if current_type.is_valid() { + Some(current_type) + } else { + None + } + } + + /// Given that this type is an array or vector type, return its number of + /// elements. + pub(crate) fn num_elements(&self) -> Option { + let num_elements_returned = unsafe { clang_getNumElements(self.x) }; + if num_elements_returned != -1 { + Some(num_elements_returned as usize) + } else { + None + } + } + + /// Get the canonical version of this type. This sees through `typedef`s and + /// aliases to get the underlying, canonical type. + pub(crate) fn canonical_type(&self) -> Type { + unsafe { + Type { + x: clang_getCanonicalType(self.x), + } + } + } + + /// Is this type a variadic function type? + pub(crate) fn is_variadic(&self) -> bool { + unsafe { clang_isFunctionTypeVariadic(self.x) != 0 } + } + + /// Given that this type is a function type, get the type of its return + /// value. + pub(crate) fn ret_type(&self) -> Option { + let rt = Type { + x: unsafe { clang_getResultType(self.x) }, + }; + if rt.is_valid() { + Some(rt) + } else { + None + } + } + + /// Given that this type is a function type, get its calling convention. If + /// this is not a function type, `CXCallingConv_Invalid` is returned. + pub(crate) fn call_conv(&self) -> CXCallingConv { + unsafe { clang_getFunctionTypeCallingConv(self.x) } + } + + /// For elaborated types (types which use `class`, `struct`, or `union` to + /// disambiguate types from local bindings), get the underlying type. + pub(crate) fn named(&self) -> Type { + unsafe { + Type { + x: clang_Type_getNamedType(self.x), + } + } + } + + /// Is this a valid type? + pub(crate) fn is_valid(&self) -> bool { + self.kind() != CXType_Invalid + } + + /// Is this a valid and exposed type? + pub(crate) fn is_valid_and_exposed(&self) -> bool { + self.is_valid() && self.kind() != CXType_Unexposed + } + + /// Is this type a fully instantiated template? + pub(crate) fn is_fully_instantiated_template(&self) -> bool { + // Yep, the spelling of this containing type-parameter is extremely + // nasty... But can happen in . Unfortunately I couldn't + // reduce it enough :( + self.template_args().map_or(false, |args| args.len() > 0) && + !matches!( + self.declaration().kind(), + CXCursor_ClassTemplatePartialSpecialization | + CXCursor_TypeAliasTemplateDecl | + CXCursor_TemplateTemplateParameter + ) + } + + /// Is this type an associated template type? Eg `T::Associated` in + /// this example: + /// + /// ```c++ + /// template + /// class Foo { + /// typename T::Associated member; + /// }; + /// ``` + pub(crate) fn is_associated_type(&self) -> bool { + // This is terrible :( + fn hacky_parse_associated_type>(spelling: S) -> bool { + lazy_static! { + static ref ASSOC_TYPE_RE: regex::Regex = regex::Regex::new( + r"typename type\-parameter\-\d+\-\d+::.+" + ) + .unwrap(); + } + ASSOC_TYPE_RE.is_match(spelling.as_ref()) + } + + self.kind() == CXType_Unexposed && + (hacky_parse_associated_type(self.spelling()) || + hacky_parse_associated_type( + self.canonical_type().spelling(), + )) + } +} + +/// The `CanonicalTypeDeclaration` type exists as proof-by-construction that its +/// cursor is the canonical declaration for its type. If you have a +/// `CanonicalTypeDeclaration` instance, you know for sure that the type and +/// cursor match up in a canonical declaration relationship, and it simply +/// cannot be otherwise. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct CanonicalTypeDeclaration(Type, Cursor); + +impl CanonicalTypeDeclaration { + /// Get the type. + pub(crate) fn ty(&self) -> &Type { + &self.0 + } + + /// Get the type's canonical declaration cursor. + pub(crate) fn cursor(&self) -> &Cursor { + &self.1 + } +} + +/// An iterator for a type's template arguments. +pub(crate) struct TypeTemplateArgIterator { + x: CXType, + length: u32, + index: u32, +} + +impl Iterator for TypeTemplateArgIterator { + type Item = Type; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index as c_uint; + self.index += 1; + Some(Type { + x: unsafe { clang_Type_getTemplateArgumentAsType(self.x, idx) }, + }) + } else { + None + } + } +} + +impl ExactSizeIterator for TypeTemplateArgIterator { + fn len(&self) -> usize { + assert!(self.index <= self.length); + (self.length - self.index) as usize + } +} + +/// A `SourceLocation` is a file, line, column, and byte offset location for +/// some source text. +pub(crate) struct SourceLocation { + x: CXSourceLocation, +} + +impl SourceLocation { + /// Get the (file, line, column, byte offset) tuple for this source + /// location. + pub(crate) fn location(&self) -> (File, usize, usize, usize) { + unsafe { + let mut file = mem::zeroed(); + let mut line = 0; + let mut col = 0; + let mut off = 0; + clang_getSpellingLocation( + self.x, &mut file, &mut line, &mut col, &mut off, + ); + (File { x: file }, line as usize, col as usize, off as usize) + } + } +} + +impl fmt::Display for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (file, line, col, _) = self.location(); + if let Some(name) = file.name() { + write!(f, "{}:{}:{}", name, line, col) + } else { + "builtin definitions".fmt(f) + } + } +} + +impl fmt::Debug for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self) + } +} + +/// A comment in the source text. +/// +/// Comments are sort of parsed by Clang, and have a tree structure. +pub(crate) struct Comment { + x: CXComment, +} + +impl Comment { + /// What kind of comment is this? + pub(crate) fn kind(&self) -> CXCommentKind { + unsafe { clang_Comment_getKind(self.x) } + } + + /// Get this comment's children comment + pub(crate) fn get_children(&self) -> CommentChildrenIterator { + CommentChildrenIterator { + parent: self.x, + length: unsafe { clang_Comment_getNumChildren(self.x) }, + index: 0, + } + } + + /// Given that this comment is the start or end of an HTML tag, get its tag + /// name. + pub(crate) fn get_tag_name(&self) -> String { + unsafe { cxstring_into_string(clang_HTMLTagComment_getTagName(self.x)) } + } + + /// Given that this comment is an HTML start tag, get its attributes. + pub(crate) fn get_tag_attrs(&self) -> CommentAttributesIterator { + CommentAttributesIterator { + x: self.x, + length: unsafe { clang_HTMLStartTag_getNumAttrs(self.x) }, + index: 0, + } + } +} + +/// An iterator for a comment's children +pub(crate) struct CommentChildrenIterator { + parent: CXComment, + length: c_uint, + index: c_uint, +} + +impl Iterator for CommentChildrenIterator { + type Item = Comment; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index; + self.index += 1; + Some(Comment { + x: unsafe { clang_Comment_getChild(self.parent, idx) }, + }) + } else { + None + } + } +} + +/// An HTML start tag comment attribute +pub(crate) struct CommentAttribute { + /// HTML start tag attribute name + pub(crate) name: String, + /// HTML start tag attribute value + pub(crate) value: String, +} + +/// An iterator for a comment's attributes +pub(crate) struct CommentAttributesIterator { + x: CXComment, + length: c_uint, + index: c_uint, +} + +impl Iterator for CommentAttributesIterator { + type Item = CommentAttribute; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index; + self.index += 1; + Some(CommentAttribute { + name: unsafe { + cxstring_into_string(clang_HTMLStartTag_getAttrName( + self.x, idx, + )) + }, + value: unsafe { + cxstring_into_string(clang_HTMLStartTag_getAttrValue( + self.x, idx, + )) + }, + }) + } else { + None + } + } +} + +/// A source file. +pub(crate) struct File { + x: CXFile, +} + +impl File { + /// Get the name of this source file. + pub(crate) fn name(&self) -> Option { + if self.x.is_null() { + return None; + } + Some(unsafe { cxstring_into_string(clang_getFileName(self.x)) }) + } +} + +fn cxstring_to_string_leaky(s: CXString) -> String { + if s.data.is_null() { + return "".to_owned(); + } + let c_str = unsafe { CStr::from_ptr(clang_getCString(s) as *const _) }; + c_str.to_string_lossy().into_owned() +} + +fn cxstring_into_string(s: CXString) -> String { + let ret = cxstring_to_string_leaky(s); + unsafe { clang_disposeString(s) }; + ret +} + +/// An `Index` is an environment for a set of translation units that will +/// typically end up linked together in one final binary. +pub(crate) struct Index { + x: CXIndex, +} + +impl Index { + /// Construct a new `Index`. + /// + /// The `pch` parameter controls whether declarations in pre-compiled + /// headers are included when enumerating a translation unit's "locals". + /// + /// The `diag` parameter controls whether debugging diagnostics are enabled. + pub(crate) fn new(pch: bool, diag: bool) -> Index { + unsafe { + Index { + x: clang_createIndex(pch as c_int, diag as c_int), + } + } + } +} + +impl fmt::Debug for Index { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Index {{ }}") + } +} + +impl Drop for Index { + fn drop(&mut self) { + unsafe { + clang_disposeIndex(self.x); + } + } +} + +/// A translation unit (or "compilation unit"). +pub(crate) struct TranslationUnit { + x: CXTranslationUnit, +} + +impl fmt::Debug for TranslationUnit { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "TranslationUnit {{ }}") + } +} + +impl TranslationUnit { + /// Parse a source file into a translation unit. + pub(crate) fn parse( + ix: &Index, + file: &str, + cmd_args: &[String], + unsaved: &[UnsavedFile], + opts: CXTranslationUnit_Flags, + ) -> Option { + let fname = CString::new(file).unwrap(); + let _c_args: Vec = cmd_args + .iter() + .map(|s| CString::new(s.clone()).unwrap()) + .collect(); + let c_args: Vec<*const c_char> = + _c_args.iter().map(|s| s.as_ptr()).collect(); + let mut c_unsaved: Vec = + unsaved.iter().map(|f| f.x).collect(); + let tu = unsafe { + clang_parseTranslationUnit( + ix.x, + fname.as_ptr(), + c_args.as_ptr(), + c_args.len() as c_int, + c_unsaved.as_mut_ptr(), + c_unsaved.len() as c_uint, + opts, + ) + }; + if tu.is_null() { + None + } else { + Some(TranslationUnit { x: tu }) + } + } + + /// Get the Clang diagnostic information associated with this translation + /// unit. + pub(crate) fn diags(&self) -> Vec { + unsafe { + let num = clang_getNumDiagnostics(self.x) as usize; + let mut diags = vec![]; + for i in 0..num { + diags.push(Diagnostic { + x: clang_getDiagnostic(self.x, i as c_uint), + }); + } + diags + } + } + + /// Get a cursor pointing to the root of this translation unit's AST. + pub(crate) fn cursor(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getTranslationUnitCursor(self.x), + } + } + } + + /// Is this the null translation unit? + pub(crate) fn is_null(&self) -> bool { + self.x.is_null() + } +} + +impl Drop for TranslationUnit { + fn drop(&mut self) { + unsafe { + clang_disposeTranslationUnit(self.x); + } + } +} + +/// A diagnostic message generated while parsing a translation unit. +pub(crate) struct Diagnostic { + x: CXDiagnostic, +} + +impl Diagnostic { + /// Format this diagnostic message as a string, using the given option bit + /// flags. + pub(crate) fn format(&self) -> String { + unsafe { + let opts = clang_defaultDiagnosticDisplayOptions(); + cxstring_into_string(clang_formatDiagnostic(self.x, opts)) + } + } + + /// What is the severity of this diagnostic message? + pub(crate) fn severity(&self) -> CXDiagnosticSeverity { + unsafe { clang_getDiagnosticSeverity(self.x) } + } +} + +impl Drop for Diagnostic { + /// Destroy this diagnostic message. + fn drop(&mut self) { + unsafe { + clang_disposeDiagnostic(self.x); + } + } +} + +/// A file which has not been saved to disk. +pub(crate) struct UnsavedFile { + x: CXUnsavedFile, + /// The name of the unsaved file. Kept here to avoid leaving dangling pointers in + /// `CXUnsavedFile`. + pub(crate) name: CString, + contents: CString, +} + +impl UnsavedFile { + /// Construct a new unsaved file with the given `name` and `contents`. + pub(crate) fn new(name: String, contents: String) -> UnsavedFile { + let name = CString::new(name).unwrap(); + let contents = CString::new(contents).unwrap(); + let x = CXUnsavedFile { + Filename: name.as_ptr(), + Contents: contents.as_ptr(), + Length: contents.as_bytes().len() as c_ulong, + }; + UnsavedFile { x, name, contents } + } +} + +impl fmt::Debug for UnsavedFile { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "UnsavedFile(name: {:?}, contents: {:?})", + self.name, self.contents + ) + } +} + +/// Convert a cursor kind into a static string. +pub(crate) fn kind_to_str(x: CXCursorKind) -> String { + unsafe { cxstring_into_string(clang_getCursorKindSpelling(x)) } +} + +/// Convert a type kind to a static string. +pub(crate) fn type_to_str(x: CXTypeKind) -> String { + unsafe { cxstring_into_string(clang_getTypeKindSpelling(x)) } +} + +/// Dump the Clang AST to stdout for debugging purposes. +pub(crate) fn ast_dump(c: &Cursor, depth: isize) -> CXChildVisitResult { + fn print_indent>(depth: isize, s: S) { + for _ in 0..depth { + print!(" "); + } + println!("{}", s.as_ref()); + } + + fn print_cursor>(depth: isize, prefix: S, c: &Cursor) { + let prefix = prefix.as_ref(); + print_indent( + depth, + format!(" {}kind = {}", prefix, kind_to_str(c.kind())), + ); + print_indent( + depth, + format!(" {}spelling = \"{}\"", prefix, c.spelling()), + ); + print_indent(depth, format!(" {}location = {}", prefix, c.location())); + print_indent( + depth, + format!(" {}is-definition? {}", prefix, c.is_definition()), + ); + print_indent( + depth, + format!(" {}is-declaration? {}", prefix, c.is_declaration()), + ); + print_indent( + depth, + format!( + " {}is-inlined-function? {}", + prefix, + c.is_inlined_function() + ), + ); + + let templ_kind = c.template_kind(); + if templ_kind != CXCursor_NoDeclFound { + print_indent( + depth, + format!( + " {}template-kind = {}", + prefix, + kind_to_str(templ_kind) + ), + ); + } + if let Some(usr) = c.usr() { + print_indent(depth, format!(" {}usr = \"{}\"", prefix, usr)); + } + if let Ok(num) = c.num_args() { + print_indent(depth, format!(" {}number-of-args = {}", prefix, num)); + } + if let Some(num) = c.num_template_args() { + print_indent( + depth, + format!(" {}number-of-template-args = {}", prefix, num), + ); + } + + if c.is_bit_field() { + let width = match c.bit_width() { + Some(w) => w.to_string(), + None => "".to_string(), + }; + print_indent(depth, format!(" {}bit-width = {}", prefix, width)); + } + + if let Some(ty) = c.enum_type() { + print_indent( + depth, + format!(" {}enum-type = {}", prefix, type_to_str(ty.kind())), + ); + } + if let Some(val) = c.enum_val_signed() { + print_indent(depth, format!(" {}enum-val = {}", prefix, val)); + } + if let Some(ty) = c.typedef_type() { + print_indent( + depth, + format!(" {}typedef-type = {}", prefix, type_to_str(ty.kind())), + ); + } + if let Some(ty) = c.ret_type() { + print_indent( + depth, + format!(" {}ret-type = {}", prefix, type_to_str(ty.kind())), + ); + } + + if let Some(refd) = c.referenced() { + if refd != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "referenced.", + &refd, + ); + } + } + + let canonical = c.canonical(); + if canonical != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "canonical.", + &canonical, + ); + } + + if let Some(specialized) = c.specialized() { + if specialized != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "specialized.", + &specialized, + ); + } + } + + if let Some(parent) = c.fallible_semantic_parent() { + println!(); + print_cursor( + depth, + String::from(prefix) + "semantic-parent.", + &parent, + ); + } + } + + fn print_type>(depth: isize, prefix: S, ty: &Type) { + let prefix = prefix.as_ref(); + + let kind = ty.kind(); + print_indent(depth, format!(" {}kind = {}", prefix, type_to_str(kind))); + if kind == CXType_Invalid { + return; + } + + print_indent(depth, format!(" {}cconv = {}", prefix, ty.call_conv())); + + print_indent( + depth, + format!(" {}spelling = \"{}\"", prefix, ty.spelling()), + ); + let num_template_args = + unsafe { clang_Type_getNumTemplateArguments(ty.x) }; + if num_template_args >= 0 { + print_indent( + depth, + format!( + " {}number-of-template-args = {}", + prefix, num_template_args + ), + ); + } + if let Some(num) = ty.num_elements() { + print_indent( + depth, + format!(" {}number-of-elements = {}", prefix, num), + ); + } + print_indent( + depth, + format!(" {}is-variadic? {}", prefix, ty.is_variadic()), + ); + + let canonical = ty.canonical_type(); + if canonical != *ty { + println!(); + print_type(depth, String::from(prefix) + "canonical.", &canonical); + } + + if let Some(pointee) = ty.pointee_type() { + if pointee != *ty { + println!(); + print_type(depth, String::from(prefix) + "pointee.", &pointee); + } + } + + if let Some(elem) = ty.elem_type() { + if elem != *ty { + println!(); + print_type(depth, String::from(prefix) + "elements.", &elem); + } + } + + if let Some(ret) = ty.ret_type() { + if ret != *ty { + println!(); + print_type(depth, String::from(prefix) + "return.", &ret); + } + } + + let named = ty.named(); + if named != *ty && named.is_valid() { + println!(); + print_type(depth, String::from(prefix) + "named.", &named); + } + } + + print_indent(depth, "("); + print_cursor(depth, "", c); + + println!(); + let ty = c.cur_type(); + print_type(depth, "type.", &ty); + + let declaration = ty.declaration(); + if declaration != *c && declaration.kind() != CXCursor_NoDeclFound { + println!(); + print_cursor(depth, "type.declaration.", &declaration); + } + + // Recurse. + let mut found_children = false; + c.visit(|s| { + if !found_children { + println!(); + found_children = true; + } + ast_dump(&s, depth + 1) + }); + + print_indent(depth, ")"); + + CXChildVisit_Continue +} + +/// Try to extract the clang version to a string +pub(crate) fn extract_clang_version() -> String { + unsafe { cxstring_into_string(clang_getClangVersion()) } +} + +/// A wrapper for the result of evaluating an expression. +#[derive(Debug)] +pub(crate) struct EvalResult { + x: CXEvalResult, + ty: Type, +} + +impl EvalResult { + /// Evaluate `cursor` and return the result. + pub(crate) fn new(cursor: Cursor) -> Option { + // Work around https://bugs.llvm.org/show_bug.cgi?id=42532, see: + // * https://github.com/rust-lang/rust-bindgen/issues/283 + // * https://github.com/rust-lang/rust-bindgen/issues/1590 + { + let mut found_cant_eval = false; + cursor.visit(|c| { + if c.kind() == CXCursor_TypeRef && + c.cur_type().canonical_type().kind() == CXType_Unexposed + { + found_cant_eval = true; + return CXChildVisit_Break; + } + + CXChildVisit_Recurse + }); + + if found_cant_eval { + return None; + } + } + Some(EvalResult { + x: unsafe { clang_Cursor_Evaluate(cursor.x) }, + ty: cursor.cur_type().canonical_type(), + }) + } + + fn kind(&self) -> CXEvalResultKind { + unsafe { clang_EvalResult_getKind(self.x) } + } + + /// Try to get back the result as a double. + pub(crate) fn as_double(&self) -> Option { + match self.kind() { + CXEval_Float => { + Some(unsafe { clang_EvalResult_getAsDouble(self.x) }) + } + _ => None, + } + } + + /// Try to get back the result as an integer. + pub(crate) fn as_int(&self) -> Option { + if self.kind() != CXEval_Int { + return None; + } + + if unsafe { clang_EvalResult_isUnsignedInt(self.x) } != 0 { + let value = unsafe { clang_EvalResult_getAsUnsigned(self.x) }; + if value > i64::max_value() as c_ulonglong { + return None; + } + + return Some(value as i64); + } + + let value = unsafe { clang_EvalResult_getAsLongLong(self.x) }; + if value > i64::max_value() as c_longlong { + return None; + } + if value < i64::min_value() as c_longlong { + return None; + } + #[allow(clippy::unnecessary_cast)] + Some(value as i64) + } + + /// Evaluates the expression as a literal string, that may or may not be + /// valid utf-8. + pub(crate) fn as_literal_string(&self) -> Option> { + if self.kind() != CXEval_StrLiteral { + return None; + } + + let char_ty = self.ty.pointee_type().or_else(|| self.ty.elem_type())?; + match char_ty.kind() { + CXType_Char_S | CXType_SChar | CXType_Char_U | CXType_UChar => { + let ret = unsafe { + CStr::from_ptr(clang_EvalResult_getAsStr(self.x)) + }; + Some(ret.to_bytes().to_vec()) + } + // FIXME: Support generating these. + CXType_Char16 => None, + CXType_Char32 => None, + CXType_WChar => None, + _ => None, + } + } +} + +impl Drop for EvalResult { + fn drop(&mut self) { + unsafe { clang_EvalResult_dispose(self.x) }; + } +} + +/// Target information obtained from libclang. +#[derive(Debug)] +pub(crate) struct TargetInfo { + /// The target triple. + pub(crate) triple: String, + /// The width of the pointer _in bits_. + pub(crate) pointer_width: usize, +} + +impl TargetInfo { + /// Tries to obtain target information from libclang. + pub(crate) fn new(tu: &TranslationUnit) -> Self { + let triple; + let pointer_width; + unsafe { + let ti = clang_getTranslationUnitTargetInfo(tu.x); + triple = cxstring_into_string(clang_TargetInfo_getTriple(ti)); + pointer_width = clang_TargetInfo_getPointerWidth(ti); + clang_TargetInfo_dispose(ti); + } + assert!(pointer_width > 0); + assert_eq!(pointer_width % 8, 0); + TargetInfo { + triple, + pointer_width: pointer_width as usize, + } + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,102 @@ +#[repr(C)] +#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct __BindgenBitfieldUnit { + storage: Storage, +} + +impl __BindgenBitfieldUnit { + #[inline] + pub const fn new(storage: Storage) -> Self { + Self { storage } + } +} + +impl __BindgenBitfieldUnit +where + Storage: AsRef<[u8]> + AsMut<[u8]>, +{ + #[inline] + pub fn get_bit(&self, index: usize) -> bool { + debug_assert!(index / 8 < self.storage.as_ref().len()); + + let byte_index = index / 8; + let byte = self.storage.as_ref()[byte_index]; + + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + + let mask = 1 << bit_index; + + byte & mask == mask + } + + #[inline] + pub fn set_bit(&mut self, index: usize, val: bool) { + debug_assert!(index / 8 < self.storage.as_ref().len()); + + let byte_index = index / 8; + let byte = &mut self.storage.as_mut()[byte_index]; + + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + + let mask = 1 << bit_index; + if val { + *byte |= mask; + } else { + *byte &= !mask; + } + } + + #[inline] + pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!( + (bit_offset + (bit_width as usize)) / 8 <= + self.storage.as_ref().len() + ); + + let mut val = 0; + + for i in 0..(bit_width as usize) { + if self.get_bit(i + bit_offset) { + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + val |= 1 << index; + } + } + + val + } + + #[inline] + pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!( + (bit_offset + (bit_width as usize)) / 8 <= + self.storage.as_ref().len() + ); + + for i in 0..(bit_width as usize) { + let mask = 1 << i; + let val_bit_is_set = val & mask == mask; + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + self.set_bit(index + bit_offset, val_bit_is_set); + } + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,260 @@ +//! Tests for `__BindgenBitfieldUnit`. +//! +//! Note that bit-fields are allocated right to left (least to most significant +//! bits). +//! +//! From the x86 PS ABI: +//! +//! ```c +//! struct { +//! int j : 5; +//! int k : 6; +//! int m : 7; +//! }; +//! ``` +//! +//! ```ignore +//! +------------------------------------------------------------+ +//! | | | | | +//! | padding | m | k | j | +//! |31 18|17 11|10 5|4 0| +//! +------------------------------------------------------------+ +//! ``` + +use super::bitfield_unit::__BindgenBitfieldUnit; + +#[test] +fn bitfield_unit_get_bit() { + let unit = __BindgenBitfieldUnit::<[u8; 2]>::new([0b10011101, 0b00011101]); + + let mut bits = vec![]; + for i in 0..16 { + bits.push(unit.get_bit(i)); + } + + println!(); + println!("bits = {:?}", bits); + assert_eq!( + bits, + &[ + // 0b10011101 + true, false, true, true, true, false, false, true, + // 0b00011101 + true, false, true, true, true, false, false, false + ] + ); +} + +#[test] +fn bitfield_unit_set_bit() { + let mut unit = + __BindgenBitfieldUnit::<[u8; 2]>::new([0b00000000, 0b00000000]); + + for i in 0..16 { + if i % 3 == 0 { + unit.set_bit(i, true); + } + } + + for i in 0..16 { + assert_eq!(unit.get_bit(i), i % 3 == 0); + } + + let mut unit = + __BindgenBitfieldUnit::<[u8; 2]>::new([0b11111111, 0b11111111]); + + for i in 0..16 { + if i % 3 == 0 { + unit.set_bit(i, false); + } + } + + for i in 0..16 { + assert_eq!(unit.get_bit(i), i % 3 != 0); + } +} + +macro_rules! bitfield_unit_get { + ( + $( + With $storage:expr , then get($start:expr, $len:expr) is $expected:expr; + )* + ) => { + #[test] + fn bitfield_unit_get() { + $({ + let expected = $expected; + let unit = __BindgenBitfieldUnit::<_>::new($storage); + let actual = unit.get($start, $len); + + println!(); + println!("expected = {:064b}", expected); + println!("actual = {:064b}", actual); + + assert_eq!(expected, actual); + })* + } + } +} + +bitfield_unit_get! { + // Let's just exhaustively test getting the bits from a single byte, since + // there are few enough combinations... + + With [0b11100010], then get(0, 1) is 0; + With [0b11100010], then get(1, 1) is 1; + With [0b11100010], then get(2, 1) is 0; + With [0b11100010], then get(3, 1) is 0; + With [0b11100010], then get(4, 1) is 0; + With [0b11100010], then get(5, 1) is 1; + With [0b11100010], then get(6, 1) is 1; + With [0b11100010], then get(7, 1) is 1; + + With [0b11100010], then get(0, 2) is 0b10; + With [0b11100010], then get(1, 2) is 0b01; + With [0b11100010], then get(2, 2) is 0b00; + With [0b11100010], then get(3, 2) is 0b00; + With [0b11100010], then get(4, 2) is 0b10; + With [0b11100010], then get(5, 2) is 0b11; + With [0b11100010], then get(6, 2) is 0b11; + + With [0b11100010], then get(0, 3) is 0b010; + With [0b11100010], then get(1, 3) is 0b001; + With [0b11100010], then get(2, 3) is 0b000; + With [0b11100010], then get(3, 3) is 0b100; + With [0b11100010], then get(4, 3) is 0b110; + With [0b11100010], then get(5, 3) is 0b111; + + With [0b11100010], then get(0, 4) is 0b0010; + With [0b11100010], then get(1, 4) is 0b0001; + With [0b11100010], then get(2, 4) is 0b1000; + With [0b11100010], then get(3, 4) is 0b1100; + With [0b11100010], then get(4, 4) is 0b1110; + + With [0b11100010], then get(0, 5) is 0b00010; + With [0b11100010], then get(1, 5) is 0b10001; + With [0b11100010], then get(2, 5) is 0b11000; + With [0b11100010], then get(3, 5) is 0b11100; + + With [0b11100010], then get(0, 6) is 0b100010; + With [0b11100010], then get(1, 6) is 0b110001; + With [0b11100010], then get(2, 6) is 0b111000; + + With [0b11100010], then get(0, 7) is 0b1100010; + With [0b11100010], then get(1, 7) is 0b1110001; + + With [0b11100010], then get(0, 8) is 0b11100010; + + // OK. Now let's test getting bits from across byte boundaries. + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(0, 16) is 0b1111111101010101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(1, 16) is 0b0111111110101010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(2, 16) is 0b0011111111010101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(3, 16) is 0b0001111111101010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(4, 16) is 0b0000111111110101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(5, 16) is 0b0000011111111010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(6, 16) is 0b0000001111111101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(7, 16) is 0b0000000111111110; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(8, 16) is 0b0000000011111111; +} + +macro_rules! bitfield_unit_set { + ( + $( + set($start:expr, $len:expr, $val:expr) is $expected:expr; + )* + ) => { + #[test] + fn bitfield_unit_set() { + $( + let mut unit = __BindgenBitfieldUnit::<[u8; 4]>::new([0, 0, 0, 0]); + unit.set($start, $len, $val); + let actual = unit.get(0, 32); + + println!(); + println!("set({}, {}, {:032b}", $start, $len, $val); + println!("expected = {:064b}", $expected); + println!("actual = {:064b}", actual); + + assert_eq!($expected, actual); + )* + } + } +} + +bitfield_unit_set! { + // Once again, let's exhaustively test single byte combinations. + + set(0, 1, 0b11111111) is 0b00000001; + set(1, 1, 0b11111111) is 0b00000010; + set(2, 1, 0b11111111) is 0b00000100; + set(3, 1, 0b11111111) is 0b00001000; + set(4, 1, 0b11111111) is 0b00010000; + set(5, 1, 0b11111111) is 0b00100000; + set(6, 1, 0b11111111) is 0b01000000; + set(7, 1, 0b11111111) is 0b10000000; + + set(0, 2, 0b11111111) is 0b00000011; + set(1, 2, 0b11111111) is 0b00000110; + set(2, 2, 0b11111111) is 0b00001100; + set(3, 2, 0b11111111) is 0b00011000; + set(4, 2, 0b11111111) is 0b00110000; + set(5, 2, 0b11111111) is 0b01100000; + set(6, 2, 0b11111111) is 0b11000000; + + set(0, 3, 0b11111111) is 0b00000111; + set(1, 3, 0b11111111) is 0b00001110; + set(2, 3, 0b11111111) is 0b00011100; + set(3, 3, 0b11111111) is 0b00111000; + set(4, 3, 0b11111111) is 0b01110000; + set(5, 3, 0b11111111) is 0b11100000; + + set(0, 4, 0b11111111) is 0b00001111; + set(1, 4, 0b11111111) is 0b00011110; + set(2, 4, 0b11111111) is 0b00111100; + set(3, 4, 0b11111111) is 0b01111000; + set(4, 4, 0b11111111) is 0b11110000; + + set(0, 5, 0b11111111) is 0b00011111; + set(1, 5, 0b11111111) is 0b00111110; + set(2, 5, 0b11111111) is 0b01111100; + set(3, 5, 0b11111111) is 0b11111000; + + set(0, 6, 0b11111111) is 0b00111111; + set(1, 6, 0b11111111) is 0b01111110; + set(2, 6, 0b11111111) is 0b11111100; + + set(0, 7, 0b11111111) is 0b01111111; + set(1, 7, 0b11111111) is 0b11111110; + + set(0, 8, 0b11111111) is 0b11111111; + + // And, now let's cross byte boundaries. + + set(0, 16, 0b1111111111111111) is 0b00000000000000001111111111111111; + set(1, 16, 0b1111111111111111) is 0b00000000000000011111111111111110; + set(2, 16, 0b1111111111111111) is 0b00000000000000111111111111111100; + set(3, 16, 0b1111111111111111) is 0b00000000000001111111111111111000; + set(4, 16, 0b1111111111111111) is 0b00000000000011111111111111110000; + set(5, 16, 0b1111111111111111) is 0b00000000000111111111111111100000; + set(6, 16, 0b1111111111111111) is 0b00000000001111111111111111000000; + set(7, 16, 0b1111111111111111) is 0b00000000011111111111111110000000; + set(8, 16, 0b1111111111111111) is 0b00000000111111111111111100000000; +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,201 @@ +use crate::codegen; +use crate::ir::context::BindgenContext; +use crate::ir::function::ClangAbi; +use proc_macro2::Ident; + +/// Used to build the output tokens for dynamic bindings. +#[derive(Default)] +pub(crate) struct DynamicItems { + /// Tracks the tokens that will appears inside the library struct -- e.g.: + /// ```ignore + /// struct Lib { + /// __library: ::libloading::Library, + /// pub x: Result, // <- tracks these + /// ... + /// } + /// ``` + struct_members: Vec, + + /// Tracks the tokens that will appear inside the library struct's implementation, e.g.: + /// + /// ```ignore + /// impl Lib { + /// ... + /// pub unsafe fn foo(&self, ...) { // <- tracks these + /// ... + /// } + /// } + /// ``` + struct_implementation: Vec, + + /// Tracks the initialization of the fields inside the `::new` constructor of the library + /// struct, e.g.: + /// ```ignore + /// impl Lib { + /// + /// pub unsafe fn new

(path: P) -> Result + /// where + /// P: AsRef<::std::ffi::OsStr>, + /// { + /// ... + /// let foo = __library.get(...) ...; // <- tracks these + /// ... + /// } + /// + /// ... + /// } + /// ``` + constructor_inits: Vec, + + /// Tracks the information that is passed to the library struct at the end of the `::new` + /// constructor, e.g.: + /// ```ignore + /// impl LibFoo { + /// pub unsafe fn new

(path: P) -> Result + /// where + /// P: AsRef<::std::ffi::OsStr>, + /// { + /// ... + /// Ok(LibFoo { + /// __library: __library, + /// foo, + /// bar, // <- tracks these + /// ... + /// }) + /// } + /// } + /// ``` + init_fields: Vec, +} + +impl DynamicItems { + pub(crate) fn new() -> Self { + Self::default() + } + + pub(crate) fn get_tokens( + &self, + lib_ident: Ident, + ctx: &BindgenContext, + ) -> proc_macro2::TokenStream { + let struct_members = &self.struct_members; + let constructor_inits = &self.constructor_inits; + let init_fields = &self.init_fields; + let struct_implementation = &self.struct_implementation; + + let from_library = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { Self::from_library(library) }) + } else { + quote!(Self::from_library(library)) + }; + + quote! { + extern crate libloading; + + pub struct #lib_ident { + __library: ::libloading::Library, + #(#struct_members)* + } + + impl #lib_ident { + pub unsafe fn new

( + path: P + ) -> Result + where P: AsRef<::std::ffi::OsStr> { + let library = ::libloading::Library::new(path)?; + #from_library + } + + pub unsafe fn from_library( + library: L + ) -> Result + where L: Into<::libloading::Library> { + let __library = library.into(); + #( #constructor_inits )* + Ok(#lib_ident { + __library, + #( #init_fields ),* + }) + } + + #( #struct_implementation )* + } + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn push( + &mut self, + ident: Ident, + abi: ClangAbi, + is_variadic: bool, + is_required: bool, + args: Vec, + args_identifiers: Vec, + ret: proc_macro2::TokenStream, + ret_ty: proc_macro2::TokenStream, + attributes: Vec, + ctx: &BindgenContext, + ) { + if !is_variadic { + assert_eq!(args.len(), args_identifiers.len()); + } + + let signature = quote! { unsafe extern #abi fn ( #( #args),* ) #ret }; + let member = if is_required { + signature + } else { + quote! { Result<#signature, ::libloading::Error> } + }; + + self.struct_members.push(quote! { + pub #ident: #member, + }); + + // N.B: If the signature was required, it won't be wrapped in a Result<...> + // and we can simply call it directly. + let fn_ = if is_required { + quote! { self.#ident } + } else { + quote! { self.#ident.as_ref().expect("Expected function, got error.") } + }; + let call_body = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { (#fn_)(#( #args_identifiers ),*) }) + } else { + quote!((#fn_)(#( #args_identifiers ),*) ) + }; + + // We can't implement variadic functions from C easily, so we allow to + // access the function pointer so that the user can call it just fine. + if !is_variadic { + self.struct_implementation.push(quote! { + #(#attributes)* + pub unsafe fn #ident ( &self, #( #args ),* ) #ret_ty { + #call_body + } + }); + } + + // N.B: Unwrap the signature upon construction if it is required to be resolved. + let ident_str = codegen::helpers::ast_ty::cstr_expr(ident.to_string()); + let library_get = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { __library.get(#ident_str) }) + } else { + quote!(__library.get(#ident_str)) + }; + + self.constructor_inits.push(if is_required { + quote! { + let #ident = #library_get.map(|sym| *sym)?; + } + } else { + quote! { + let #ident = #library_get.map(|sym| *sym); + } + }); + + self.init_fields.push(quote! { + #ident + }); + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,33 @@ +use std::error; +use std::fmt; + +/// Errors that can occur during code generation. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum Error { + /// Tried to generate an opaque blob for a type that did not have a layout. + NoLayoutForOpaqueBlob, + + /// Tried to instantiate an opaque template definition, or a template + /// definition that is too difficult for us to understand (like a partial + /// template specialization). + InstantiationOfOpaqueType, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match *self { + Error::NoLayoutForOpaqueBlob => { + "Tried to generate an opaque blob, but had no layout" + } + Error::InstantiationOfOpaqueType => { + "Instantiation of opaque template type or partial template \ + specialization" + } + }) + } +} + +impl error::Error for Error {} + +/// A `Result` of `T` or an error of `bindgen::codegen::error::Error`. +pub(crate) type Result = ::std::result::Result; diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,322 @@ +//! Helpers for code generation that don't need macro expansion. + +use crate::ir::context::BindgenContext; +use crate::ir::layout::Layout; +use proc_macro2::{Ident, Span, TokenStream}; +use quote::TokenStreamExt; + +pub(crate) mod attributes { + use proc_macro2::{Ident, Span, TokenStream}; + use std::{borrow::Cow, str::FromStr}; + + pub(crate) fn repr(which: &str) -> TokenStream { + let which = Ident::new(which, Span::call_site()); + quote! { + #[repr( #which )] + } + } + + pub(crate) fn repr_list(which_ones: &[&str]) -> TokenStream { + let which_ones = which_ones + .iter() + .cloned() + .map(|one| TokenStream::from_str(one).expect("repr to be valid")); + quote! { + #[repr( #( #which_ones ),* )] + } + } + + pub(crate) fn derives(which_ones: &[&str]) -> TokenStream { + let which_ones = which_ones + .iter() + .cloned() + .map(|one| TokenStream::from_str(one).expect("derive to be valid")); + quote! { + #[derive( #( #which_ones ),* )] + } + } + + pub(crate) fn inline() -> TokenStream { + quote! { + #[inline] + } + } + + pub(crate) fn must_use() -> TokenStream { + quote! { + #[must_use] + } + } + + pub(crate) fn non_exhaustive() -> TokenStream { + quote! { + #[non_exhaustive] + } + } + + pub(crate) fn doc(comment: String) -> TokenStream { + if comment.is_empty() { + quote!() + } else { + quote!(#[doc = #comment]) + } + } + + pub(crate) fn link_name(name: &str) -> TokenStream { + // LLVM mangles the name by default but it's already mangled. + // Prefixing the name with \u{1} should tell LLVM to not mangle it. + let name: Cow<'_, str> = if MANGLE { + name.into() + } else { + format!("\u{1}{}", name).into() + }; + + quote! { + #[link_name = #name] + } + } +} + +/// Generates a proper type for a field or type with a given `Layout`, that is, +/// a type with the correct size and alignment restrictions. +pub(crate) fn blob(ctx: &BindgenContext, layout: Layout) -> TokenStream { + let opaque = layout.opaque(); + + // FIXME(emilio, #412): We fall back to byte alignment, but there are + // some things that legitimately are more than 8-byte aligned. + // + // Eventually we should be able to `unwrap` here, but... + let ty_name = match opaque.known_rust_type_for_array(ctx) { + Some(ty) => ty, + None => { + warn!("Found unknown alignment on code generation!"); + "u8" + } + }; + + let ty_name = Ident::new(ty_name, Span::call_site()); + + let data_len = opaque.array_size(ctx).unwrap_or(layout.size); + + if data_len == 1 { + quote! { + #ty_name + } + } else { + quote! { + [ #ty_name ; #data_len ] + } + } +} + +/// Integer type of the same size as the given `Layout`. +pub(crate) fn integer_type( + ctx: &BindgenContext, + layout: Layout, +) -> Option { + let name = Layout::known_type_for_size(ctx, layout.size)?; + let name = Ident::new(name, Span::call_site()); + Some(quote! { #name }) +} + +/// Generates a bitfield allocation unit type for a type with the given `Layout`. +pub(crate) fn bitfield_unit( + ctx: &BindgenContext, + layout: Layout, +) -> TokenStream { + let mut tokens = quote! {}; + + if ctx.options().enable_cxx_namespaces { + tokens.append_all(quote! { root:: }); + } + + let size = layout.size; + tokens.append_all(quote! { + __BindgenBitfieldUnit<[u8; #size]> + }); + + tokens +} + +pub(crate) mod ast_ty { + use crate::ir::context::BindgenContext; + use crate::ir::function::FunctionSig; + use crate::ir::layout::Layout; + use crate::ir::ty::FloatKind; + use proc_macro2::{self, TokenStream}; + use std::str::FromStr; + + pub(crate) fn c_void(ctx: &BindgenContext) -> TokenStream { + // ctypes_prefix takes precedence + match ctx.options().ctypes_prefix { + Some(ref prefix) => { + let prefix = TokenStream::from_str(prefix.as_str()).unwrap(); + quote! { + #prefix::c_void + } + } + None => { + if ctx.options().use_core && + ctx.options().rust_features.core_ffi_c_void + { + quote! { ::core::ffi::c_void } + } else { + quote! { ::std::os::raw::c_void } + } + } + } + } + + pub(crate) fn raw_type(ctx: &BindgenContext, name: &str) -> TokenStream { + let ident = ctx.rust_ident_raw(name); + match ctx.options().ctypes_prefix { + Some(ref prefix) => { + let prefix = TokenStream::from_str(prefix.as_str()).unwrap(); + quote! { + #prefix::#ident + } + } + None => { + if ctx.options().use_core && + ctx.options().rust_features().core_ffi_c + { + quote! { + ::core::ffi::#ident + } + } else { + quote! { + ::std::os::raw::#ident + } + } + } + } + } + + pub(crate) fn float_kind_rust_type( + ctx: &BindgenContext, + fk: FloatKind, + layout: Option, + ) -> TokenStream { + // TODO: we probably should take the type layout into account more + // often? + // + // Also, maybe this one shouldn't be the default? + match (fk, ctx.options().convert_floats) { + (FloatKind::Float, true) => quote! { f32 }, + (FloatKind::Double, true) => quote! { f64 }, + (FloatKind::Float, false) => raw_type(ctx, "c_float"), + (FloatKind::Double, false) => raw_type(ctx, "c_double"), + (FloatKind::LongDouble, _) => { + match layout { + Some(layout) => { + match layout.size { + 4 => quote! { f32 }, + 8 => quote! { f64 }, + // TODO(emilio): If rust ever gains f128 we should + // use it here and below. + _ => super::integer_type(ctx, layout) + .unwrap_or(quote! { f64 }), + } + } + None => { + debug_assert!( + false, + "How didn't we know the layout for a primitive type?" + ); + quote! { f64 } + } + } + } + (FloatKind::Float128, _) => { + if ctx.options().rust_features.i128_and_u128 { + quote! { u128 } + } else { + quote! { [u64; 2] } + } + } + } + } + + pub(crate) fn int_expr(val: i64) -> TokenStream { + // Don't use quote! { #val } because that adds the type suffix. + let val = proc_macro2::Literal::i64_unsuffixed(val); + quote!(#val) + } + + pub(crate) fn uint_expr(val: u64) -> TokenStream { + // Don't use quote! { #val } because that adds the type suffix. + let val = proc_macro2::Literal::u64_unsuffixed(val); + quote!(#val) + } + + pub(crate) fn byte_array_expr(bytes: &[u8]) -> TokenStream { + let mut bytes: Vec<_> = bytes.to_vec(); + bytes.push(0); + quote! { [ #(#bytes),* ] } + } + + pub(crate) fn cstr_expr(mut string: String) -> TokenStream { + string.push('\0'); + let b = proc_macro2::Literal::byte_string(string.as_bytes()); + quote! { + #b + } + } + + pub(crate) fn float_expr( + ctx: &BindgenContext, + f: f64, + ) -> Result { + if f.is_finite() { + let val = proc_macro2::Literal::f64_unsuffixed(f); + + return Ok(quote!(#val)); + } + + let prefix = ctx.trait_prefix(); + + if f.is_nan() { + return Ok(quote! { + ::#prefix::f64::NAN + }); + } + + if f.is_infinite() { + return Ok(if f.is_sign_positive() { + quote! { + ::#prefix::f64::INFINITY + } + } else { + quote! { + ::#prefix::f64::NEG_INFINITY + } + }); + } + + warn!("Unknown non-finite float number: {:?}", f); + Err(()) + } + + pub(crate) fn arguments_from_signature( + signature: &FunctionSig, + ctx: &BindgenContext, + ) -> Vec { + let mut unnamed_arguments = 0; + signature + .argument_types() + .iter() + .map(|&(ref name, _ty)| match *name { + Some(ref name) => { + let name = ctx.rust_ident(name); + quote! { #name } + } + None => { + unnamed_arguments += 1; + let name = + ctx.rust_ident(format!("arg{}", unnamed_arguments)); + quote! { #name } + } + }) + .collect() + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,245 @@ +use crate::ir::comp::{BitfieldUnit, CompKind, Field, FieldData, FieldMethods}; +use crate::ir::context::BindgenContext; +use crate::ir::item::{HasTypeParamInArray, IsOpaque, Item, ItemCanonicalName}; +use crate::ir::ty::{TypeKind, RUST_DERIVE_IN_ARRAY_LIMIT}; + +pub(crate) fn gen_debug_impl( + ctx: &BindgenContext, + fields: &[Field], + item: &Item, + kind: CompKind, +) -> proc_macro2::TokenStream { + let struct_name = item.canonical_name(ctx); + let mut format_string = format!("{} {{{{ ", struct_name); + let mut tokens = vec![]; + + if item.is_opaque(ctx, &()) { + format_string.push_str("opaque"); + } else { + match kind { + CompKind::Union => { + format_string.push_str("union"); + } + CompKind::Struct => { + let processed_fields = fields.iter().filter_map(|f| match f { + Field::DataMember(ref fd) => fd.impl_debug(ctx, ()), + Field::Bitfields(ref bu) => bu.impl_debug(ctx, ()), + }); + + for (i, (fstring, toks)) in processed_fields.enumerate() { + if i > 0 { + format_string.push_str(", "); + } + tokens.extend(toks); + format_string.push_str(&fstring); + } + } + } + } + + format_string.push_str(" }}"); + tokens.insert(0, quote! { #format_string }); + + let prefix = ctx.trait_prefix(); + + quote! { + fn fmt(&self, f: &mut ::#prefix::fmt::Formatter<'_>) -> ::#prefix ::fmt::Result { + write!(f, #( #tokens ),*) + } + } +} + +/// A trait for the things which we can codegen tokens that contribute towards a +/// generated `impl Debug`. +pub(crate) trait ImplDebug<'a> { + /// Any extra parameter required by this a particular `ImplDebug` implementation. + type Extra; + + /// Generate a format string snippet to be included in the larger `impl Debug` + /// format string, and the code to get the format string's interpolation values. + fn impl_debug( + &self, + ctx: &BindgenContext, + extra: Self::Extra, + ) -> Option<(String, Vec)>; +} + +impl<'a> ImplDebug<'a> for FieldData { + type Extra = (); + + fn impl_debug( + &self, + ctx: &BindgenContext, + _: Self::Extra, + ) -> Option<(String, Vec)> { + if let Some(name) = self.name() { + ctx.resolve_item(self.ty()).impl_debug(ctx, name) + } else { + None + } + } +} + +impl<'a> ImplDebug<'a> for BitfieldUnit { + type Extra = (); + + fn impl_debug( + &self, + ctx: &BindgenContext, + _: Self::Extra, + ) -> Option<(String, Vec)> { + let mut format_string = String::new(); + let mut tokens = vec![]; + for (i, bitfield) in self.bitfields().iter().enumerate() { + if i > 0 { + format_string.push_str(", "); + } + + if let Some(bitfield_name) = bitfield.name() { + format_string.push_str(&format!("{} : {{:?}}", bitfield_name)); + let getter_name = bitfield.getter_name(); + let name_ident = ctx.rust_ident_raw(getter_name); + tokens.push(quote! { + self.#name_ident () + }); + } + } + + Some((format_string, tokens)) + } +} + +impl<'a> ImplDebug<'a> for Item { + type Extra = &'a str; + + fn impl_debug( + &self, + ctx: &BindgenContext, + name: &str, + ) -> Option<(String, Vec)> { + let name_ident = ctx.rust_ident(name); + + // We don't know if blocklisted items `impl Debug` or not, so we can't + // add them to the format string we're building up. + if !ctx.allowlisted_items().contains(&self.id()) { + return None; + } + + let ty = match self.as_type() { + Some(ty) => ty, + None => { + return None; + } + }; + + fn debug_print( + name: &str, + name_ident: proc_macro2::TokenStream, + ) -> Option<(String, Vec)> { + Some(( + format!("{}: {{:?}}", name), + vec![quote! { + self.#name_ident + }], + )) + } + + match *ty.kind() { + // Handle the simple cases. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::Comp(..) | + TypeKind::ObjCSel => debug_print(name, quote! { #name_ident }), + + TypeKind::TemplateInstantiation(ref inst) => { + if inst.is_opaque(ctx, self) { + Some((format!("{}: opaque", name), vec![])) + } else { + debug_print(name, quote! { #name_ident }) + } + } + + // The generic is not required to implement Debug, so we can not debug print that type + TypeKind::TypeParam => { + Some((format!("{}: Non-debuggable generic", name), vec![])) + } + + TypeKind::Array(_, len) => { + // Generics are not required to implement Debug + if self.has_type_param_in_array(ctx) { + Some(( + format!("{}: Array with length {}", name, len), + vec![], + )) + } else if len < RUST_DERIVE_IN_ARRAY_LIMIT || + ctx.options().rust_features().larger_arrays + { + // The simple case + debug_print(name, quote! { #name_ident }) + } else if ctx.options().use_core { + // There is no String in core; reducing field visibility to avoid breaking + // no_std setups. + Some((format!("{}: [...]", name), vec![])) + } else { + // Let's implement our own print function + Some(( + format!("{}: [{{}}]", name), + vec![quote! { + self.#name_ident + .iter() + .enumerate() + .map(|(i, v)| format!("{}{:?}", if i > 0 { ", " } else { "" }, v)) + .collect::() + }], + )) + } + } + TypeKind::Vector(_, len) => { + if ctx.options().use_core { + // There is no format! in core; reducing field visibility to avoid breaking + // no_std setups. + Some((format!("{}(...)", name), vec![])) + } else { + let self_ids = 0..len; + Some(( + format!("{}({{}})", name), + vec![quote! { + #(format!("{:?}", self.#self_ids)),* + }], + )) + } + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + // We follow the aliases + ctx.resolve_item(t).impl_debug(ctx, name) + } + + TypeKind::Pointer(inner) => { + let inner_type = ctx.resolve_type(inner).canonical_type(ctx); + match *inner_type.kind() { + TypeKind::Function(ref sig) + if !sig.function_pointers_can_derive() => + { + Some((format!("{}: FunctionPointer", name), vec![])) + } + _ => debug_print(name, quote! { #name_ident }), + } + } + + TypeKind::Opaque => None, + } + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,142 @@ +use crate::ir::comp::{CompInfo, CompKind, Field, FieldMethods}; +use crate::ir::context::BindgenContext; +use crate::ir::item::{IsOpaque, Item}; +use crate::ir::ty::{TypeKind, RUST_DERIVE_IN_ARRAY_LIMIT}; + +/// Generate a manual implementation of `PartialEq` trait for the +/// specified compound type. +pub(crate) fn gen_partialeq_impl( + ctx: &BindgenContext, + comp_info: &CompInfo, + item: &Item, + ty_for_impl: &proc_macro2::TokenStream, +) -> Option { + let mut tokens = vec![]; + + if item.is_opaque(ctx, &()) { + tokens.push(quote! { + &self._bindgen_opaque_blob[..] == &other._bindgen_opaque_blob[..] + }); + } else if comp_info.kind() == CompKind::Union { + assert!(!ctx.options().untagged_union); + tokens.push(quote! { + &self.bindgen_union_field[..] == &other.bindgen_union_field[..] + }); + } else { + for base in comp_info.base_members().iter() { + if !base.requires_storage(ctx) { + continue; + } + + let ty_item = ctx.resolve_item(base.ty); + let field_name = &base.field_name; + + if ty_item.is_opaque(ctx, &()) { + let field_name = ctx.rust_ident(field_name); + tokens.push(quote! { + &self. #field_name [..] == &other. #field_name [..] + }); + } else { + tokens.push(gen_field(ctx, ty_item, field_name)); + } + } + + for field in comp_info.fields() { + match *field { + Field::DataMember(ref fd) => { + let ty_item = ctx.resolve_item(fd.ty()); + let name = fd.name().unwrap(); + tokens.push(gen_field(ctx, ty_item, name)); + } + Field::Bitfields(ref bu) => { + for bitfield in bu.bitfields() { + if bitfield.name().is_some() { + let getter_name = bitfield.getter_name(); + let name_ident = ctx.rust_ident_raw(getter_name); + tokens.push(quote! { + self.#name_ident () == other.#name_ident () + }); + } + } + } + } + } + } + + Some(quote! { + fn eq(&self, other: & #ty_for_impl) -> bool { + #( #tokens )&&* + } + }) +} + +fn gen_field( + ctx: &BindgenContext, + ty_item: &Item, + name: &str, +) -> proc_macro2::TokenStream { + fn quote_equals( + name_ident: proc_macro2::Ident, + ) -> proc_macro2::TokenStream { + quote! { self.#name_ident == other.#name_ident } + } + + let name_ident = ctx.rust_ident(name); + let ty = ty_item.expect_type(); + + match *ty.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Complex(..) | + TypeKind::Float(..) | + TypeKind::Enum(..) | + TypeKind::TypeParam | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::Reference(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::Comp(..) | + TypeKind::Pointer(_) | + TypeKind::Function(..) | + TypeKind::Opaque => quote_equals(name_ident), + + TypeKind::TemplateInstantiation(ref inst) => { + if inst.is_opaque(ctx, ty_item) { + quote! { + &self. #name_ident [..] == &other. #name_ident [..] + } + } else { + quote_equals(name_ident) + } + } + + TypeKind::Array(_, len) => { + if len <= RUST_DERIVE_IN_ARRAY_LIMIT || + ctx.options().rust_features().larger_arrays + { + quote_equals(name_ident) + } else { + quote! { + &self. #name_ident [..] == &other. #name_ident [..] + } + } + } + TypeKind::Vector(_, len) => { + let self_ids = 0..len; + let other_ids = 0..len; + quote! { + #(self.#self_ids == other.#other_ids &&)* true + } + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + let inner_item = ctx.resolve_item(t); + gen_field(ctx, inner_item, name) + } + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,5366 @@ +mod dyngen; +mod error; +mod helpers; +mod impl_debug; +mod impl_partialeq; +mod postprocessing; +mod serialize; +pub(crate) mod struct_layout; + +#[cfg(test)] +#[allow(warnings)] +pub(crate) mod bitfield_unit; +#[cfg(all(test, target_endian = "little"))] +mod bitfield_unit_tests; + +use self::dyngen::DynamicItems; +use self::helpers::attributes; +use self::struct_layout::StructLayoutTracker; + +use super::BindgenOptions; + +use crate::callbacks::{DeriveInfo, TypeKind as DeriveTypeKind}; +use crate::ir::analysis::{HasVtable, Sizedness}; +use crate::ir::annotations::{ + Annotations, FieldAccessorKind, FieldVisibilityKind, +}; +use crate::ir::comp::{ + Bitfield, BitfieldUnit, CompInfo, CompKind, Field, FieldData, FieldMethods, + Method, MethodKind, +}; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::derive::{ + CanDerive, CanDeriveCopy, CanDeriveDebug, CanDeriveDefault, CanDeriveEq, + CanDeriveHash, CanDeriveOrd, CanDerivePartialEq, CanDerivePartialOrd, +}; +use crate::ir::dot; +use crate::ir::enum_ty::{Enum, EnumVariant, EnumVariantValue}; +use crate::ir::function::{ + Abi, ClangAbi, Function, FunctionKind, FunctionSig, Linkage, +}; +use crate::ir::int::IntKind; +use crate::ir::item::{IsOpaque, Item, ItemCanonicalName, ItemCanonicalPath}; +use crate::ir::item_kind::ItemKind; +use crate::ir::layout::Layout; +use crate::ir::module::Module; +use crate::ir::objc::{ObjCInterface, ObjCMethod}; +use crate::ir::template::{ + AsTemplateParam, TemplateInstantiation, TemplateParameters, +}; +use crate::ir::ty::{Type, TypeKind}; +use crate::ir::var::Var; + +use proc_macro2::{self, Ident, Span}; +use quote::TokenStreamExt; + +use crate::{Entry, HashMap, HashSet}; +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::VecDeque; +use std::fmt::{self, Write}; +use std::ops; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum CodegenError { + Serialize { msg: String, loc: String }, + Io(String), +} + +impl From for CodegenError { + fn from(err: std::io::Error) -> Self { + Self::Io(err.to_string()) + } +} + +impl fmt::Display for CodegenError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Serialize { msg, loc } => { + write!(f, "serialization error at {}: {}", loc, msg) + } + Self::Io(err) => err.fmt(f), + } + } +} + +// Name of type defined in constified enum module +pub(crate) static CONSTIFIED_ENUM_MODULE_REPR_NAME: &str = "Type"; + +fn top_level_path( + ctx: &BindgenContext, + item: &Item, +) -> Vec { + let mut path = vec![quote! { self }]; + + if ctx.options().enable_cxx_namespaces { + for _ in 0..item.codegen_depth(ctx) { + path.push(quote! { super }); + } + } + + path +} + +fn root_import( + ctx: &BindgenContext, + module: &Item, +) -> proc_macro2::TokenStream { + assert!(ctx.options().enable_cxx_namespaces, "Somebody messed it up"); + assert!(module.is_module()); + + let mut path = top_level_path(ctx, module); + + let root = ctx.root_module().canonical_name(ctx); + let root_ident = ctx.rust_ident(root); + path.push(quote! { #root_ident }); + + let mut tokens = quote! {}; + tokens.append_separated(path, quote!(::)); + + quote! { + #[allow(unused_imports)] + use #tokens ; + } +} + +bitflags! { + struct DerivableTraits: u16 { + const DEBUG = 1 << 0; + const DEFAULT = 1 << 1; + const COPY = 1 << 2; + const CLONE = 1 << 3; + const HASH = 1 << 4; + const PARTIAL_ORD = 1 << 5; + const ORD = 1 << 6; + const PARTIAL_EQ = 1 << 7; + const EQ = 1 << 8; + } +} + +fn derives_of_item( + item: &Item, + ctx: &BindgenContext, + packed: bool, +) -> DerivableTraits { + let mut derivable_traits = DerivableTraits::empty(); + + let all_template_params = item.all_template_params(ctx); + + if item.can_derive_copy(ctx) && !item.annotations().disallow_copy() { + derivable_traits |= DerivableTraits::COPY; + + if ctx.options().rust_features().builtin_clone_impls || + !all_template_params.is_empty() + { + // FIXME: This requires extra logic if you have a big array in a + // templated struct. The reason for this is that the magic: + // fn clone(&self) -> Self { *self } + // doesn't work for templates. + // + // It's not hard to fix though. + derivable_traits |= DerivableTraits::CLONE; + } + } else if packed { + // If the struct or union is packed, deriving from Copy is required for + // deriving from any other trait. + return derivable_traits; + } + + if item.can_derive_debug(ctx) && !item.annotations().disallow_debug() { + derivable_traits |= DerivableTraits::DEBUG; + } + + if item.can_derive_default(ctx) && !item.annotations().disallow_default() { + derivable_traits |= DerivableTraits::DEFAULT; + } + + if item.can_derive_hash(ctx) { + derivable_traits |= DerivableTraits::HASH; + } + + if item.can_derive_partialord(ctx) { + derivable_traits |= DerivableTraits::PARTIAL_ORD; + } + + if item.can_derive_ord(ctx) { + derivable_traits |= DerivableTraits::ORD; + } + + if item.can_derive_partialeq(ctx) { + derivable_traits |= DerivableTraits::PARTIAL_EQ; + } + + if item.can_derive_eq(ctx) { + derivable_traits |= DerivableTraits::EQ; + } + + derivable_traits +} + +impl From for Vec<&'static str> { + fn from(derivable_traits: DerivableTraits) -> Vec<&'static str> { + [ + (DerivableTraits::DEBUG, "Debug"), + (DerivableTraits::DEFAULT, "Default"), + (DerivableTraits::COPY, "Copy"), + (DerivableTraits::CLONE, "Clone"), + (DerivableTraits::HASH, "Hash"), + (DerivableTraits::PARTIAL_ORD, "PartialOrd"), + (DerivableTraits::ORD, "Ord"), + (DerivableTraits::PARTIAL_EQ, "PartialEq"), + (DerivableTraits::EQ, "Eq"), + ] + .iter() + .filter_map(|&(flag, derive)| { + Some(derive).filter(|_| derivable_traits.contains(flag)) + }) + .collect() + } +} + +struct CodegenResult<'a> { + items: Vec, + dynamic_items: DynamicItems, + + /// A monotonic counter used to add stable unique ID's to stuff that doesn't + /// need to be referenced by anything. + codegen_id: &'a Cell, + + /// Whether a bindgen union has been generated at least once. + saw_bindgen_union: bool, + + /// Whether an incomplete array has been generated at least once. + saw_incomplete_array: bool, + + /// Whether Objective C types have been seen at least once. + saw_objc: bool, + + /// Whether Apple block types have been seen at least once. + saw_block: bool, + + /// Whether a bitfield allocation unit has been seen at least once. + saw_bitfield_unit: bool, + + items_seen: HashSet, + /// The set of generated function/var names, needed because in C/C++ is + /// legal to do something like: + /// + /// ```c++ + /// extern "C" { + /// void foo(); + /// extern int bar; + /// } + /// + /// extern "C" { + /// void foo(); + /// extern int bar; + /// } + /// ``` + /// + /// Being these two different declarations. + functions_seen: HashSet, + vars_seen: HashSet, + + /// Used for making bindings to overloaded functions. Maps from a canonical + /// function name to the number of overloads we have already codegen'd for + /// that name. This lets us give each overload a unique suffix. + overload_counters: HashMap, + + items_to_serialize: Vec, +} + +impl<'a> CodegenResult<'a> { + fn new(codegen_id: &'a Cell) -> Self { + CodegenResult { + items: vec![], + dynamic_items: DynamicItems::new(), + saw_bindgen_union: false, + saw_incomplete_array: false, + saw_objc: false, + saw_block: false, + saw_bitfield_unit: false, + codegen_id, + items_seen: Default::default(), + functions_seen: Default::default(), + vars_seen: Default::default(), + overload_counters: Default::default(), + items_to_serialize: Default::default(), + } + } + + fn dynamic_items(&mut self) -> &mut DynamicItems { + &mut self.dynamic_items + } + + fn saw_bindgen_union(&mut self) { + self.saw_bindgen_union = true; + } + + fn saw_incomplete_array(&mut self) { + self.saw_incomplete_array = true; + } + + fn saw_objc(&mut self) { + self.saw_objc = true; + } + + fn saw_block(&mut self) { + self.saw_block = true; + } + + fn saw_bitfield_unit(&mut self) { + self.saw_bitfield_unit = true; + } + + fn seen>(&self, item: Id) -> bool { + self.items_seen.contains(&item.into()) + } + + fn set_seen>(&mut self, item: Id) { + self.items_seen.insert(item.into()); + } + + fn seen_function(&self, name: &str) -> bool { + self.functions_seen.contains(name) + } + + fn saw_function(&mut self, name: &str) { + self.functions_seen.insert(name.into()); + } + + /// Get the overload number for the given function name. Increments the + /// counter internally so the next time we ask for the overload for this + /// name, we get the incremented value, and so on. + fn overload_number(&mut self, name: &str) -> u32 { + let counter = self.overload_counters.entry(name.into()).or_insert(0); + let number = *counter; + *counter += 1; + number + } + + fn seen_var(&self, name: &str) -> bool { + self.vars_seen.contains(name) + } + + fn saw_var(&mut self, name: &str) { + self.vars_seen.insert(name.into()); + } + + fn inner(&mut self, cb: F) -> Vec + where + F: FnOnce(&mut Self), + { + let mut new = Self::new(self.codegen_id); + + cb(&mut new); + + self.saw_incomplete_array |= new.saw_incomplete_array; + self.saw_objc |= new.saw_objc; + self.saw_block |= new.saw_block; + self.saw_bitfield_unit |= new.saw_bitfield_unit; + self.saw_bindgen_union |= new.saw_bindgen_union; + + new.items + } +} + +impl<'a> ops::Deref for CodegenResult<'a> { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.items + } +} + +impl<'a> ops::DerefMut for CodegenResult<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.items + } +} + +/// A trait to convert a rust type into a pointer, optionally const, to the same +/// type. +trait ToPtr { + fn to_ptr(self, is_const: bool) -> proc_macro2::TokenStream; +} + +impl ToPtr for proc_macro2::TokenStream { + fn to_ptr(self, is_const: bool) -> proc_macro2::TokenStream { + if is_const { + quote! { *const #self } + } else { + quote! { *mut #self } + } + } +} + +/// An extension trait for `proc_macro2::TokenStream` that lets us append any implicit +/// template parameters that exist for some type, if necessary. +trait AppendImplicitTemplateParams { + fn append_implicit_template_params( + &mut self, + ctx: &BindgenContext, + item: &Item, + ); +} + +impl AppendImplicitTemplateParams for proc_macro2::TokenStream { + fn append_implicit_template_params( + &mut self, + ctx: &BindgenContext, + item: &Item, + ) { + let item = item.id().into_resolver().through_type_refs().resolve(ctx); + + match *item.expect_type().kind() { + TypeKind::UnresolvedTypeRef(..) => { + unreachable!("already resolved unresolved type refs") + } + TypeKind::ResolvedTypeRef(..) => { + unreachable!("we resolved item through type refs") + } + + // None of these types ever have implicit template parameters. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Pointer(..) | + TypeKind::Reference(..) | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Array(..) | + TypeKind::TypeParam | + TypeKind::Opaque | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::TemplateInstantiation(..) => return, + _ => {} + } + + let params: Vec<_> = item + .used_template_params(ctx) + .iter() + .map(|p| { + p.try_to_rust_ty(ctx, &()) + .expect("template params cannot fail to be a rust type") + }) + .collect(); + if !params.is_empty() { + self.append_all(quote! { + < #( #params ),* > + }); + } + } +} + +trait CodeGenerator { + /// Extra information from the caller. + type Extra; + + /// Extra information returned to the caller. + type Return; + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + extra: &Self::Extra, + ) -> Self::Return; +} + +impl Item { + fn process_before_codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult, + ) -> bool { + if !self.is_enabled_for_codegen(ctx) { + return false; + } + + if self.is_blocklisted(ctx) || result.seen(self.id()) { + debug!( + "::process_before_codegen: Ignoring hidden or seen: \ + self = {:?}", + self + ); + return false; + } + + if !ctx.codegen_items().contains(&self.id()) { + // TODO(emilio, #453): Figure out what to do when this happens + // legitimately, we could track the opaque stuff and disable the + // assertion there I guess. + warn!("Found non-allowlisted item in code generation: {:?}", self); + } + + result.set_seen(self.id()); + true + } +} + +impl CodeGenerator for Item { + type Extra = (); + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + _extra: &(), + ) { + debug!("::codegen: self = {:?}", self); + if !self.process_before_codegen(ctx, result) { + return; + } + + match *self.kind() { + ItemKind::Module(ref module) => { + module.codegen(ctx, result, self); + } + ItemKind::Function(ref fun) => { + fun.codegen(ctx, result, self); + } + ItemKind::Var(ref var) => { + var.codegen(ctx, result, self); + } + ItemKind::Type(ref ty) => { + ty.codegen(ctx, result, self); + } + } + } +} + +impl CodeGenerator for Module { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + + let codegen_self = |result: &mut CodegenResult, + found_any: &mut bool| { + for child in self.children() { + if ctx.codegen_items().contains(child) { + *found_any = true; + ctx.resolve_item(*child).codegen(ctx, result, &()); + } + } + + if item.id() == ctx.root_module() { + if result.saw_block { + utils::prepend_block_header(ctx, &mut *result); + } + if result.saw_bindgen_union { + utils::prepend_union_types(ctx, &mut *result); + } + if result.saw_incomplete_array { + utils::prepend_incomplete_array_types(ctx, &mut *result); + } + if ctx.need_bindgen_complex_type() { + utils::prepend_complex_type(&mut *result); + } + if result.saw_objc { + utils::prepend_objc_header(ctx, &mut *result); + } + if result.saw_bitfield_unit { + utils::prepend_bitfield_unit_type(ctx, &mut *result); + } + } + }; + + if !ctx.options().enable_cxx_namespaces || + (self.is_inline() && + !ctx.options().conservative_inline_namespaces) + { + codegen_self(result, &mut false); + return; + } + + let mut found_any = false; + let inner_items = result.inner(|result| { + result.push(root_import(ctx, item)); + + let path = item.namespace_aware_canonical_path(ctx).join("::"); + if let Some(raw_lines) = ctx.options().module_lines.get(&path) { + for raw_line in raw_lines { + found_any = true; + result.push( + proc_macro2::TokenStream::from_str(raw_line).unwrap(), + ); + } + } + + codegen_self(result, &mut found_any); + }); + + // Don't bother creating an empty module. + if !found_any { + return; + } + + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(name); + result.push(if item.id() == ctx.root_module() { + quote! { + #[allow(non_snake_case, non_camel_case_types, non_upper_case_globals)] + pub mod #ident { + #( #inner_items )* + } + } + } else { + quote! { + pub mod #ident { + #( #inner_items )* + } + } + }); + } +} + +impl CodeGenerator for Var { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + use crate::ir::var::VarType; + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let canonical_name = item.canonical_name(ctx); + + if result.seen_var(&canonical_name) { + return; + } + result.saw_var(&canonical_name); + + let canonical_ident = ctx.rust_ident(&canonical_name); + + // We can't generate bindings to static variables of templates. The + // number of actual variables for a single declaration are open ended + // and we don't know what instantiations do or don't exist. + if !item.all_template_params(ctx).is_empty() { + return; + } + + let mut attrs = vec![]; + if let Some(comment) = item.comment(ctx) { + attrs.push(attributes::doc(comment)); + } + + let ty = self.ty().to_rust_ty_or_opaque(ctx, &()); + + if let Some(val) = self.val() { + match *val { + VarType::Bool(val) => { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #val ; + }); + } + VarType::Int(val) => { + let int_kind = self + .ty() + .into_resolver() + .through_type_aliases() + .through_type_refs() + .resolve(ctx) + .expect_type() + .as_integer() + .unwrap(); + let val = if int_kind.is_signed() { + helpers::ast_ty::int_expr(val) + } else { + helpers::ast_ty::uint_expr(val as _) + }; + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #val ; + }); + } + VarType::String(ref bytes) => { + // Account the trailing zero. + // + // TODO: Here we ignore the type we just made up, probably + // we should refactor how the variable type and ty ID work. + let len = bytes.len() + 1; + let ty = quote! { + [u8; #len] + }; + + match String::from_utf8(bytes.clone()) { + Ok(string) => { + let cstr = helpers::ast_ty::cstr_expr(string); + if ctx + .options() + .rust_features + .static_lifetime_elision + { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : &#ty = #cstr ; + }); + } else { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : &'static #ty = #cstr ; + }); + } + } + Err(..) => { + let bytes = helpers::ast_ty::byte_array_expr(bytes); + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #bytes ; + }); + } + } + } + VarType::Float(f) => { + if let Ok(expr) = helpers::ast_ty::float_expr(ctx, f) { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #expr ; + }); + } + } + VarType::Char(c) => { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #c ; + }); + } + } + } else { + // If necessary, apply a `#[link_name]` attribute + if let Some(link_name) = self.link_name() { + attrs.push(attributes::link_name::(link_name)); + } else { + let link_name = + self.mangled_name().unwrap_or_else(|| self.name()); + if !utils::names_will_be_identical_after_mangling( + &canonical_name, + link_name, + None, + ) { + attrs.push(attributes::link_name::(link_name)); + } + } + + let maybe_mut = if self.is_const() { + quote! {} + } else { + quote! { mut } + }; + + let tokens = quote!( + extern "C" { + #(#attrs)* + pub static #maybe_mut #canonical_ident: #ty; + } + ); + + result.push(tokens); + } + } +} + +impl CodeGenerator for Type { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + match *self.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Array(..) | + TypeKind::Vector(..) | + TypeKind::Pointer(..) | + TypeKind::Reference(..) | + TypeKind::Function(..) | + TypeKind::ResolvedTypeRef(..) | + TypeKind::Opaque | + TypeKind::TypeParam => { + // These items don't need code generation, they only need to be + // converted to rust types in fields, arguments, and such. + // NOTE(emilio): If you add to this list, make sure to also add + // it to BindgenContext::compute_allowlisted_and_codegen_items. + } + TypeKind::TemplateInstantiation(ref inst) => { + inst.codegen(ctx, result, item) + } + TypeKind::BlockPointer(inner) => { + if !ctx.options().generate_block { + return; + } + + let inner_item = + inner.into_resolver().through_type_refs().resolve(ctx); + let name = item.canonical_name(ctx); + + let inner_rust_type = { + if let TypeKind::Function(fnsig) = + inner_item.kind().expect_type().kind() + { + utils::fnsig_block(ctx, fnsig) + } else { + panic!("invalid block typedef: {:?}", inner_item) + } + }; + + let rust_name = ctx.rust_ident(name); + + let mut tokens = if let Some(comment) = item.comment(ctx) { + attributes::doc(comment) + } else { + quote! {} + }; + + tokens.append_all(quote! { + pub type #rust_name = #inner_rust_type ; + }); + + result.push(tokens); + result.saw_block(); + } + TypeKind::Comp(ref ci) => ci.codegen(ctx, result, item), + TypeKind::TemplateAlias(inner, _) | TypeKind::Alias(inner) => { + let inner_item = + inner.into_resolver().through_type_refs().resolve(ctx); + let name = item.canonical_name(ctx); + let path = item.canonical_path(ctx); + + { + let through_type_aliases = inner + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(ctx); + + // Try to catch the common pattern: + // + // typedef struct foo { ... } foo; + // + // here, and also other more complex cases like #946. + if through_type_aliases.canonical_path(ctx) == path { + return; + } + } + + // If this is a known named type, disallow generating anything + // for it too. If size_t -> usize conversions are enabled, we + // need to check that these conversions are permissible, but + // nothing needs to be generated, still. + let spelling = self.name().expect("Unnamed alias?"); + if utils::type_from_named(ctx, spelling).is_some() { + if let "size_t" | "ssize_t" = spelling { + let layout = inner_item + .kind() + .expect_type() + .layout(ctx) + .expect("No layout?"); + assert_eq!( + layout.size, + ctx.target_pointer_size(), + "Target platform requires `--no-size_t-is-usize`. The size of `{}` ({}) does not match the target pointer size ({})", + spelling, + layout.size, + ctx.target_pointer_size(), + ); + assert_eq!( + layout.align, + ctx.target_pointer_size(), + "Target platform requires `--no-size_t-is-usize`. The alignment of `{}` ({}) does not match the target pointer size ({})", + spelling, + layout.align, + ctx.target_pointer_size(), + ); + } + return; + } + + let mut outer_params = item.used_template_params(ctx); + + let is_opaque = item.is_opaque(ctx, &()); + let inner_rust_type = if is_opaque { + outer_params = vec![]; + self.to_opaque(ctx, item) + } else { + // Its possible that we have better layout information than + // the inner type does, so fall back to an opaque blob based + // on our layout if converting the inner item fails. + let mut inner_ty = inner_item + .try_to_rust_ty_or_opaque(ctx, &()) + .unwrap_or_else(|_| self.to_opaque(ctx, item)); + inner_ty.append_implicit_template_params(ctx, inner_item); + inner_ty + }; + + { + // FIXME(emilio): This is a workaround to avoid generating + // incorrect type aliases because of types that we haven't + // been able to resolve (because, eg, they depend on a + // template parameter). + // + // It's kind of a shame not generating them even when they + // could be referenced, but we already do the same for items + // with invalid template parameters, and at least this way + // they can be replaced, instead of generating plain invalid + // code. + let inner_canon_type = + inner_item.expect_type().canonical_type(ctx); + if inner_canon_type.is_invalid_type_param() { + warn!( + "Item contained invalid named type, skipping: \ + {:?}, {:?}", + item, inner_item + ); + return; + } + } + + let rust_name = ctx.rust_ident(&name); + + let mut tokens = if let Some(comment) = item.comment(ctx) { + attributes::doc(comment) + } else { + quote! {} + }; + + let alias_style = if ctx.options().type_alias.matches(&name) { + AliasVariation::TypeAlias + } else if ctx.options().new_type_alias.matches(&name) { + AliasVariation::NewType + } else if ctx.options().new_type_alias_deref.matches(&name) { + AliasVariation::NewTypeDeref + } else { + ctx.options().default_alias_style + }; + + // We prefer using `pub use` over `pub type` because of: + // https://github.com/rust-lang/rust/issues/26264 + // These are the only characters allowed in simple + // paths, eg `good::dogs::Bront`. + if inner_rust_type.to_string().chars().all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | ':' | '_' | ' ')) && outer_params.is_empty() && + !is_opaque && + alias_style == AliasVariation::TypeAlias && + inner_item.expect_type().canonical_type(ctx).is_enum() + { + tokens.append_all(quote! { + pub use + }); + let path = top_level_path(ctx, item); + tokens.append_separated(path, quote!(::)); + tokens.append_all(quote! { + :: #inner_rust_type as #rust_name ; + }); + result.push(tokens); + return; + } + + tokens.append_all(match alias_style { + AliasVariation::TypeAlias => quote! { + pub type #rust_name + }, + AliasVariation::NewType | AliasVariation::NewTypeDeref => { + assert!( + ctx.options().rust_features().repr_transparent, + "repr_transparent feature is required to use {:?}", + alias_style + ); + + let mut attributes = + vec![attributes::repr("transparent")]; + let packed = false; // Types can't be packed in Rust. + let derivable_traits = + derives_of_item(item, ctx, packed); + if !derivable_traits.is_empty() { + let derives: Vec<_> = derivable_traits.into(); + attributes.push(attributes::derives(&derives)) + } + + quote! { + #( #attributes )* + pub struct #rust_name + } + } + }); + + let params: Vec<_> = outer_params + .into_iter() + .filter_map(|p| p.as_template_param(ctx, &())) + .collect(); + if params + .iter() + .any(|p| ctx.resolve_type(*p).is_invalid_type_param()) + { + warn!( + "Item contained invalid template \ + parameter: {:?}", + item + ); + return; + } + let params: Vec<_> = params + .iter() + .map(|p| { + p.try_to_rust_ty(ctx, &()).expect( + "type parameters can always convert to rust ty OK", + ) + }) + .collect(); + + if !params.is_empty() { + tokens.append_all(quote! { + < #( #params ),* > + }); + } + + let access_spec = + access_specifier(ctx.options().default_visibility); + tokens.append_all(match alias_style { + AliasVariation::TypeAlias => quote! { + = #inner_rust_type ; + }, + AliasVariation::NewType | AliasVariation::NewTypeDeref => { + quote! { + (#access_spec #inner_rust_type) ; + } + } + }); + + if alias_style == AliasVariation::NewTypeDeref { + let prefix = ctx.trait_prefix(); + tokens.append_all(quote! { + impl ::#prefix::ops::Deref for #rust_name { + type Target = #inner_rust_type; + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } + } + impl ::#prefix::ops::DerefMut for #rust_name { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + }); + } + + result.push(tokens); + } + TypeKind::Enum(ref ei) => ei.codegen(ctx, result, item), + TypeKind::ObjCId | TypeKind::ObjCSel => { + result.saw_objc(); + } + TypeKind::ObjCInterface(ref interface) => { + interface.codegen(ctx, result, item) + } + ref u @ TypeKind::UnresolvedTypeRef(..) => { + unreachable!("Should have been resolved after parsing {:?}!", u) + } + } + } +} + +struct Vtable<'a> { + item_id: ItemId, + /// A reference to the originating compound object. + #[allow(dead_code)] + comp_info: &'a CompInfo, +} + +impl<'a> Vtable<'a> { + fn new(item_id: ItemId, comp_info: &'a CompInfo) -> Self { + Vtable { item_id, comp_info } + } +} + +impl<'a> CodeGenerator for Vtable<'a> { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + assert_eq!(item.id(), self.item_id); + debug_assert!(item.is_enabled_for_codegen(ctx)); + let name = ctx.rust_ident(self.canonical_name(ctx)); + + // For now, we will only generate vtables for classes that: + // - do not inherit from others (compilers merge VTable from primary parent class). + // - do not contain a virtual destructor (requires ordering; platforms generate different vtables). + if ctx.options().vtable_generation && + self.comp_info.base_members().is_empty() && + self.comp_info.destructor().is_none() + { + let class_ident = ctx.rust_ident(self.item_id.canonical_name(ctx)); + + let methods = self + .comp_info + .methods() + .iter() + .filter_map(|m| { + if !m.is_virtual() { + return None; + } + + let function_item = ctx.resolve_item(m.signature()); + let function = function_item.expect_function(); + let signature_item = ctx.resolve_item(function.signature()); + let signature = match signature_item.expect_type().kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("Function signature type mismatch"), + }; + + // FIXME: Is there a canonical name without the class prepended? + let function_name = function_item.canonical_name(ctx); + + // FIXME: Need to account for overloading with times_seen (separately from regular function path). + let function_name = ctx.rust_ident(function_name); + let mut args = utils::fnsig_arguments(ctx, signature); + let ret = utils::fnsig_return_ty(ctx, signature); + + args[0] = if m.is_const() { + quote! { this: *const #class_ident } + } else { + quote! { this: *mut #class_ident } + }; + + Some(quote! { + pub #function_name : unsafe extern "C" fn( #( #args ),* ) #ret + }) + }) + .collect::>(); + + result.push(quote! { + #[repr(C)] + pub struct #name { + #( #methods ),* + } + }) + } else { + // For the cases we don't support, simply generate an empty struct. + let void = helpers::ast_ty::c_void(ctx); + + result.push(quote! { + #[repr(C)] + pub struct #name ( #void ); + }); + } + } +} + +impl<'a> ItemCanonicalName for Vtable<'a> { + fn canonical_name(&self, ctx: &BindgenContext) -> String { + format!("{}__bindgen_vtable", self.item_id.canonical_name(ctx)) + } +} + +impl<'a> TryToRustTy for Vtable<'a> { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + let name = ctx.rust_ident(self.canonical_name(ctx)); + Ok(quote! { + #name + }) + } +} + +impl CodeGenerator for TemplateInstantiation { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug_assert!(item.is_enabled_for_codegen(ctx)); + + // Although uses of instantiations don't need code generation, and are + // just converted to rust types in fields, vars, etc, we take this + // opportunity to generate tests for their layout here. If the + // instantiation is opaque, then its presumably because we don't + // properly understand it (maybe because of specializations), and so we + // shouldn't emit layout tests either. + if !ctx.options().layout_tests || self.is_opaque(ctx, item) { + return; + } + + // If there are any unbound type parameters, then we can't generate a + // layout test because we aren't dealing with a concrete type with a + // concrete size and alignment. + if ctx.uses_any_template_parameters(item.id()) { + return; + } + + let layout = item.kind().expect_type().layout(ctx); + + if let Some(layout) = layout { + let size = layout.size; + let align = layout.align; + + let name = item.full_disambiguated_name(ctx); + let mut fn_name = + format!("__bindgen_test_layout_{}_instantiation", name); + let times_seen = result.overload_number(&fn_name); + if times_seen > 0 { + write!(&mut fn_name, "_{}", times_seen).unwrap(); + } + + let fn_name = ctx.rust_ident_raw(fn_name); + + let prefix = ctx.trait_prefix(); + let ident = item.to_rust_ty_or_opaque(ctx, &()); + let size_of_expr = quote! { + ::#prefix::mem::size_of::<#ident>() + }; + let align_of_expr = quote! { + ::#prefix::mem::align_of::<#ident>() + }; + + let item = quote! { + #[test] + fn #fn_name() { + assert_eq!(#size_of_expr, #size, + concat!("Size of template specialization: ", + stringify!(#ident))); + assert_eq!(#align_of_expr, #align, + concat!("Alignment of template specialization: ", + stringify!(#ident))); + } + }; + + result.push(item); + } + } +} + +/// Trait for implementing the code generation of a struct or union field. +trait FieldCodegen<'a> { + type Extra; + + #[allow(clippy::too_many_arguments)] + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + extra: Self::Extra, + ) where + F: Extend, + M: Extend; +} + +impl<'a> FieldCodegen<'a> for Field { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + match *self { + Field::DataMember(ref data) => { + data.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (), + ); + } + Field::Bitfields(ref unit) => { + unit.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (), + ); + } + } + } +} + +fn wrap_union_field_if_needed( + ctx: &BindgenContext, + struct_layout: &StructLayoutTracker, + ty: proc_macro2::TokenStream, + result: &mut CodegenResult, +) -> proc_macro2::TokenStream { + if struct_layout.is_rust_union() { + if struct_layout.can_copy_union_fields() { + ty + } else { + let prefix = ctx.trait_prefix(); + quote! { + ::#prefix::mem::ManuallyDrop<#ty> + } + } + } else { + result.saw_bindgen_union(); + if ctx.options().enable_cxx_namespaces { + quote! { + root::__BindgenUnionField<#ty> + } + } else { + quote! { + __BindgenUnionField<#ty> + } + } + } +} + +impl<'a> FieldCodegen<'a> for FieldData { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + parent_visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + // Bitfields are handled by `FieldCodegen` implementations for + // `BitfieldUnit` and `Bitfield`. + assert!(self.bitfield_width().is_none()); + + let field_item = + self.ty().into_resolver().through_type_refs().resolve(ctx); + let field_ty = field_item.expect_type(); + let mut ty = self.ty().to_rust_ty_or_opaque(ctx, &()); + ty.append_implicit_template_params(ctx, field_item); + + // NB: If supported, we use proper `union` types. + let ty = if parent.is_union() { + wrap_union_field_if_needed(ctx, struct_layout, ty, result) + } else if let Some(item) = field_ty.is_incomplete_array(ctx) { + result.saw_incomplete_array(); + + let inner = item.to_rust_ty_or_opaque(ctx, &()); + + if ctx.options().enable_cxx_namespaces { + quote! { + root::__IncompleteArrayField<#inner> + } + } else { + quote! { + __IncompleteArrayField<#inner> + } + } + } else { + ty + }; + + let mut field = quote! {}; + if ctx.options().generate_comments { + if let Some(raw_comment) = self.comment() { + let comment = ctx.options().process_comment(raw_comment); + field = attributes::doc(comment); + } + } + + let field_name = self + .name() + .map(|name| ctx.rust_mangle(name).into_owned()) + .expect("Each field should have a name in codegen!"); + let field_ident = ctx.rust_ident_raw(field_name.as_str()); + + if let Some(padding_field) = + struct_layout.saw_field(&field_name, field_ty, self.offset()) + { + fields.extend(Some(padding_field)); + } + + let visibility = compute_visibility( + ctx, + self.is_public(), + Some(self.annotations()), + parent_visibility_kind, + ); + let accessor_kind = + self.annotations().accessor_kind().unwrap_or(accessor_kind); + + match visibility { + FieldVisibilityKind::Private => { + field.append_all(quote! { + #field_ident : #ty , + }); + } + FieldVisibilityKind::PublicCrate => { + field.append_all(quote! { + pub(crate) #field_ident : #ty , + }); + } + FieldVisibilityKind::Public => { + field.append_all(quote! { + pub #field_ident : #ty , + }); + } + } + + fields.extend(Some(field)); + + // TODO: Factor the following code out, please! + if accessor_kind == FieldAccessorKind::None { + return; + } + + let getter_name = ctx.rust_ident_raw(format!("get_{}", field_name)); + let mutable_getter_name = + ctx.rust_ident_raw(format!("get_{}_mut", field_name)); + let field_name = ctx.rust_ident_raw(field_name); + + methods.extend(Some(match accessor_kind { + FieldAccessorKind::None => unreachable!(), + FieldAccessorKind::Regular => { + quote! { + #[inline] + pub fn #getter_name(&self) -> & #ty { + &self.#field_name + } + + #[inline] + pub fn #mutable_getter_name(&mut self) -> &mut #ty { + &mut self.#field_name + } + } + } + FieldAccessorKind::Unsafe => { + quote! { + #[inline] + pub unsafe fn #getter_name(&self) -> & #ty { + &self.#field_name + } + + #[inline] + pub unsafe fn #mutable_getter_name(&mut self) -> &mut #ty { + &mut self.#field_name + } + } + } + FieldAccessorKind::Immutable => { + quote! { + #[inline] + pub fn #getter_name(&self) -> & #ty { + &self.#field_name + } + } + } + })); + } +} + +impl BitfieldUnit { + /// Get the constructor name for this bitfield unit. + fn ctor_name(&self) -> proc_macro2::TokenStream { + let ctor_name = Ident::new( + &format!("new_bitfield_{}", self.nth()), + Span::call_site(), + ); + quote! { + #ctor_name + } + } +} + +impl Bitfield { + /// Extend an under construction bitfield unit constructor with this + /// bitfield. This sets the relevant bits on the `__bindgen_bitfield_unit` + /// variable that's being constructed. + fn extend_ctor_impl( + &self, + ctx: &BindgenContext, + param_name: proc_macro2::TokenStream, + mut ctor_impl: proc_macro2::TokenStream, + ) -> proc_macro2::TokenStream { + let bitfield_ty = ctx.resolve_type(self.ty()); + let bitfield_ty_layout = bitfield_ty + .layout(ctx) + .expect("Bitfield without layout? Gah!"); + let bitfield_int_ty = helpers::integer_type(ctx, bitfield_ty_layout) + .expect( + "Should already have verified that the bitfield is \ + representable as an int", + ); + + let offset = self.offset_into_unit(); + let width = self.width() as u8; + let prefix = ctx.trait_prefix(); + + ctor_impl.append_all(quote! { + __bindgen_bitfield_unit.set( + #offset, + #width, + { + let #param_name: #bitfield_int_ty = unsafe { + ::#prefix::mem::transmute(#param_name) + }; + #param_name as u64 + } + ); + }); + + ctor_impl + } +} + +fn access_specifier( + visibility: FieldVisibilityKind, +) -> proc_macro2::TokenStream { + match visibility { + FieldVisibilityKind::Private => quote! {}, + FieldVisibilityKind::PublicCrate => quote! { pub(crate) }, + FieldVisibilityKind::Public => quote! { pub }, + } +} + +/// Compute a fields or structs visibility based on multiple conditions. +/// 1. If the element was declared public, and we respect such CXX accesses specs +/// (context option) => By default Public, but this can be overruled by an `annotation`. +/// +/// 2. If the element was declared private, and we respect such CXX accesses specs +/// (context option) => By default Private, but this can be overruled by an `annotation`. +/// +/// 3. If we do not respect visibility modifiers, the result depends on the `annotation`, +/// if any, or the passed `default_kind`. +/// +fn compute_visibility( + ctx: &BindgenContext, + is_declared_public: bool, + annotations: Option<&Annotations>, + default_kind: FieldVisibilityKind, +) -> FieldVisibilityKind { + match ( + is_declared_public, + ctx.options().respect_cxx_access_specs, + annotations.and_then(|e| e.visibility_kind()), + ) { + (true, true, annotated_visibility) => { + // declared as public, cxx specs are respected + annotated_visibility.unwrap_or(FieldVisibilityKind::Public) + } + (false, true, annotated_visibility) => { + // declared as private, cxx specs are respected + annotated_visibility.unwrap_or(FieldVisibilityKind::Private) + } + (_, false, annotated_visibility) => { + // cxx specs are not respected, declaration does not matter. + annotated_visibility.unwrap_or(default_kind) + } + } +} + +impl<'a> FieldCodegen<'a> for BitfieldUnit { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + use crate::ir::ty::RUST_DERIVE_IN_ARRAY_LIMIT; + + result.saw_bitfield_unit(); + + let layout = self.layout(); + let unit_field_ty = helpers::bitfield_unit(ctx, layout); + let field_ty = { + let unit_field_ty = unit_field_ty.clone(); + if parent.is_union() { + wrap_union_field_if_needed( + ctx, + struct_layout, + unit_field_ty, + result, + ) + } else { + unit_field_ty + } + }; + + { + let align_field_name = format!("_bitfield_align_{}", self.nth()); + let align_field_ident = ctx.rust_ident(align_field_name); + let align_ty = match self.layout().align { + n if n >= 8 => quote! { u64 }, + 4 => quote! { u32 }, + 2 => quote! { u16 }, + _ => quote! { u8 }, + }; + let access_spec = access_specifier(visibility_kind); + let align_field = quote! { + #access_spec #align_field_ident: [#align_ty; 0], + }; + fields.extend(Some(align_field)); + } + + let unit_field_name = format!("_bitfield_{}", self.nth()); + let unit_field_ident = ctx.rust_ident(&unit_field_name); + + let ctor_name = self.ctor_name(); + let mut ctor_params = vec![]; + let mut ctor_impl = quote! {}; + + // We cannot generate any constructor if the underlying storage can't + // implement AsRef<[u8]> / AsMut<[u8]> / etc, or can't derive Default. + // + // We don't check `larger_arrays` here because Default does still have + // the 32 items limitation. + let mut generate_ctor = layout.size <= RUST_DERIVE_IN_ARRAY_LIMIT; + + let mut all_fields_declared_as_public = true; + for bf in self.bitfields() { + // Codegen not allowed for anonymous bitfields + if bf.name().is_none() { + continue; + } + + if layout.size > RUST_DERIVE_IN_ARRAY_LIMIT && + !ctx.options().rust_features().larger_arrays + { + continue; + } + + all_fields_declared_as_public &= bf.is_public(); + let mut bitfield_representable_as_int = true; + bf.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (&unit_field_name, &mut bitfield_representable_as_int), + ); + + // Generating a constructor requires the bitfield to be representable as an integer. + if !bitfield_representable_as_int { + generate_ctor = false; + continue; + } + + let param_name = bitfield_getter_name(ctx, bf); + let bitfield_ty_item = ctx.resolve_item(bf.ty()); + let bitfield_ty = bitfield_ty_item.expect_type(); + let bitfield_ty = + bitfield_ty.to_rust_ty_or_opaque(ctx, bitfield_ty_item); + + ctor_params.push(quote! { + #param_name : #bitfield_ty + }); + ctor_impl = bf.extend_ctor_impl(ctx, param_name, ctor_impl); + } + + let visibility_kind = compute_visibility( + ctx, + all_fields_declared_as_public, + None, + visibility_kind, + ); + let access_spec = access_specifier(visibility_kind); + + let field = quote! { + #access_spec #unit_field_ident : #field_ty , + }; + fields.extend(Some(field)); + + if generate_ctor { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #ctor_name ( #( #ctor_params ),* ) -> #unit_field_ty { + let mut __bindgen_bitfield_unit: #unit_field_ty = Default::default(); + #ctor_impl + __bindgen_bitfield_unit + } + })); + } + + struct_layout.saw_bitfield_unit(layout); + } +} + +fn bitfield_getter_name( + ctx: &BindgenContext, + bitfield: &Bitfield, +) -> proc_macro2::TokenStream { + let name = bitfield.getter_name(); + let name = ctx.rust_ident_raw(name); + quote! { #name } +} + +fn bitfield_setter_name( + ctx: &BindgenContext, + bitfield: &Bitfield, +) -> proc_macro2::TokenStream { + let setter = bitfield.setter_name(); + let setter = ctx.rust_ident_raw(setter); + quote! { #setter } +} + +impl<'a> FieldCodegen<'a> for Bitfield { + type Extra = (&'a str, &'a mut bool); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + _accessor_kind: FieldAccessorKind, + parent: &CompInfo, + _result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + _fields: &mut F, + methods: &mut M, + (unit_field_name, bitfield_representable_as_int): (&'a str, &mut bool), + ) where + F: Extend, + M: Extend, + { + let prefix = ctx.trait_prefix(); + let getter_name = bitfield_getter_name(ctx, self); + let setter_name = bitfield_setter_name(ctx, self); + let unit_field_ident = Ident::new(unit_field_name, Span::call_site()); + + let bitfield_ty_item = ctx.resolve_item(self.ty()); + let bitfield_ty = bitfield_ty_item.expect_type(); + + let bitfield_ty_layout = bitfield_ty + .layout(ctx) + .expect("Bitfield without layout? Gah!"); + let bitfield_int_ty = + match helpers::integer_type(ctx, bitfield_ty_layout) { + Some(int_ty) => { + *bitfield_representable_as_int = true; + int_ty + } + None => { + *bitfield_representable_as_int = false; + return; + } + }; + + let bitfield_ty = + bitfield_ty.to_rust_ty_or_opaque(ctx, bitfield_ty_item); + + let offset = self.offset_into_unit(); + let width = self.width() as u8; + + let visibility_kind = compute_visibility( + ctx, + self.is_public(), + Some(self.annotations()), + visibility_kind, + ); + let access_spec = access_specifier(visibility_kind); + + if parent.is_union() && !struct_layout.is_rust_union() { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #getter_name(&self) -> #bitfield_ty { + unsafe { + ::#prefix::mem::transmute( + self.#unit_field_ident.as_ref().get(#offset, #width) + as #bitfield_int_ty + ) + } + } + + #[inline] + #access_spec fn #setter_name(&mut self, val: #bitfield_ty) { + unsafe { + let val: #bitfield_int_ty = ::#prefix::mem::transmute(val); + self.#unit_field_ident.as_mut().set( + #offset, + #width, + val as u64 + ) + } + } + })); + } else { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #getter_name(&self) -> #bitfield_ty { + unsafe { + ::#prefix::mem::transmute( + self.#unit_field_ident.get(#offset, #width) + as #bitfield_int_ty + ) + } + } + + #[inline] + #access_spec fn #setter_name(&mut self, val: #bitfield_ty) { + unsafe { + let val: #bitfield_int_ty = ::#prefix::mem::transmute(val); + self.#unit_field_ident.set( + #offset, + #width, + val as u64 + ) + } + } + })); + } + } +} + +impl CodeGenerator for CompInfo { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + // Don't output classes with template parameters that aren't types, and + // also don't output template specializations, neither total or partial. + if self.has_non_type_template_params() { + return; + } + + let ty = item.expect_type(); + let layout = ty.layout(ctx); + let mut packed = self.is_packed(ctx, layout.as_ref()); + + let canonical_name = item.canonical_name(ctx); + let canonical_ident = ctx.rust_ident(&canonical_name); + + // Generate the vtable from the method list if appropriate. + // + // TODO: I don't know how this could play with virtual methods that are + // not in the list of methods found by us, we'll see. Also, could the + // order of the vtable pointers vary? + // + // FIXME: Once we generate proper vtables, we need to codegen the + // vtable, but *not* generate a field for it in the case that + // HasVtable::has_vtable_ptr is false but HasVtable::has_vtable is true. + // + // Also, we need to generate the vtable in such a way it "inherits" from + // the parent too. + let is_opaque = item.is_opaque(ctx, &()); + let mut fields = vec![]; + let mut struct_layout = + StructLayoutTracker::new(ctx, self, ty, &canonical_name); + + if !is_opaque { + if item.has_vtable_ptr(ctx) { + let vtable = Vtable::new(item.id(), self); + vtable.codegen(ctx, result, item); + + let vtable_type = vtable + .try_to_rust_ty(ctx, &()) + .expect("vtable to Rust type conversion is infallible") + .to_ptr(true); + + fields.push(quote! { + pub vtable_: #vtable_type , + }); + + struct_layout.saw_vtable(); + } + + for base in self.base_members() { + if !base.requires_storage(ctx) { + continue; + } + + let inner_item = ctx.resolve_item(base.ty); + let mut inner = inner_item.to_rust_ty_or_opaque(ctx, &()); + inner.append_implicit_template_params(ctx, inner_item); + let field_name = ctx.rust_ident(&base.field_name); + + struct_layout.saw_base(inner_item.expect_type()); + + let visibility = match ( + base.is_public(), + ctx.options().respect_cxx_access_specs, + ) { + (true, true) => FieldVisibilityKind::Public, + (false, true) => FieldVisibilityKind::Private, + _ => ctx.options().default_visibility, + }; + + let access_spec = access_specifier(visibility); + fields.push(quote! { + #access_spec #field_name: #inner, + }); + } + } + + let mut methods = vec![]; + if !is_opaque { + let visibility = item + .annotations() + .visibility_kind() + .unwrap_or(ctx.options().default_visibility); + let struct_accessor_kind = item + .annotations() + .accessor_kind() + .unwrap_or(FieldAccessorKind::None); + for field in self.fields() { + field.codegen( + ctx, + visibility, + struct_accessor_kind, + self, + result, + &mut struct_layout, + &mut fields, + &mut methods, + (), + ); + } + // Check whether an explicit padding field is needed + // at the end. + if let Some(comp_layout) = layout { + fields.extend( + struct_layout + .add_tail_padding(&canonical_name, comp_layout), + ); + } + } + + if is_opaque { + // Opaque item should not have generated methods, fields. + debug_assert!(fields.is_empty()); + debug_assert!(methods.is_empty()); + } + + let is_union = self.kind() == CompKind::Union; + let layout = item.kind().expect_type().layout(ctx); + let zero_sized = item.is_zero_sized(ctx); + let forward_decl = self.is_forward_declaration(); + + let mut explicit_align = None; + + // C++ requires every struct to be addressable, so what C++ compilers do + // is making the struct 1-byte sized. + // + // This is apparently not the case for C, see: + // https://github.com/rust-lang/rust-bindgen/issues/551 + // + // Just get the layout, and assume C++ if not. + // + // NOTE: This check is conveniently here to avoid the dummy fields we + // may add for unused template parameters. + if !forward_decl && zero_sized { + let has_address = if is_opaque { + // Generate the address field if it's an opaque type and + // couldn't determine the layout of the blob. + layout.is_none() + } else { + layout.map_or(true, |l| l.size != 0) + }; + + if has_address { + let layout = Layout::new(1, 1); + let ty = helpers::blob(ctx, Layout::new(1, 1)); + struct_layout.saw_field_with_layout( + "_address", + layout, + /* offset = */ Some(0), + ); + fields.push(quote! { + pub _address: #ty, + }); + } + } + + if is_opaque { + match layout { + Some(l) => { + explicit_align = Some(l.align); + + let ty = helpers::blob(ctx, l); + fields.push(quote! { + pub _bindgen_opaque_blob: #ty , + }); + } + None => { + warn!("Opaque type without layout! Expect dragons!"); + } + } + } else if !is_union && !zero_sized { + if let Some(padding_field) = + layout.and_then(|layout| struct_layout.pad_struct(layout)) + { + fields.push(padding_field); + } + + if let Some(layout) = layout { + if struct_layout.requires_explicit_align(layout) { + if layout.align == 1 { + packed = true; + } else { + explicit_align = Some(layout.align); + if !ctx.options().rust_features.repr_align { + let ty = helpers::blob( + ctx, + Layout::new(0, layout.align), + ); + fields.push(quote! { + pub __bindgen_align: #ty , + }); + } + } + } + } + } else if is_union && !forward_decl { + // TODO(emilio): It'd be nice to unify this with the struct path + // above somehow. + let layout = layout.expect("Unable to get layout information?"); + if struct_layout.requires_explicit_align(layout) { + explicit_align = Some(layout.align); + } + + if !struct_layout.is_rust_union() { + let ty = helpers::blob(ctx, layout); + fields.push(quote! { + pub bindgen_union_field: #ty , + }) + } + } + + if forward_decl { + fields.push(quote! { + _unused: [u8; 0], + }); + } + + let mut generic_param_names = vec![]; + + for (idx, ty) in item.used_template_params(ctx).iter().enumerate() { + let param = ctx.resolve_type(*ty); + let name = param.name().unwrap(); + let ident = ctx.rust_ident(name); + generic_param_names.push(ident.clone()); + + let prefix = ctx.trait_prefix(); + let field_name = ctx.rust_ident(format!("_phantom_{}", idx)); + fields.push(quote! { + pub #field_name : ::#prefix::marker::PhantomData< + ::#prefix::cell::UnsafeCell<#ident> + > , + }); + } + + let generics = if !generic_param_names.is_empty() { + let generic_param_names = generic_param_names.clone(); + quote! { + < #( #generic_param_names ),* > + } + } else { + quote! {} + }; + + let mut attributes = vec![]; + let mut needs_clone_impl = false; + let mut needs_default_impl = false; + let mut needs_debug_impl = false; + let mut needs_partialeq_impl = false; + if let Some(comment) = item.comment(ctx) { + attributes.push(attributes::doc(comment)); + } + if packed && !is_opaque { + let n = layout.map_or(1, |l| l.align); + assert!(ctx.options().rust_features().repr_packed_n || n == 1); + let packed_repr = if n == 1 { + "packed".to_string() + } else { + format!("packed({})", n) + }; + attributes.push(attributes::repr_list(&["C", &packed_repr])); + } else { + attributes.push(attributes::repr("C")); + } + + if ctx.options().rust_features().repr_align { + if let Some(explicit) = explicit_align { + // Ensure that the struct has the correct alignment even in + // presence of alignas. + let explicit = helpers::ast_ty::int_expr(explicit as i64); + attributes.push(quote! { + #[repr(align(#explicit))] + }); + } + } + + let derivable_traits = derives_of_item(item, ctx, packed); + if !derivable_traits.contains(DerivableTraits::DEBUG) { + needs_debug_impl = ctx.options().derive_debug && + ctx.options().impl_debug && + !ctx.no_debug_by_name(item) && + !item.annotations().disallow_debug(); + } + + if !derivable_traits.contains(DerivableTraits::DEFAULT) { + needs_default_impl = ctx.options().derive_default && + !self.is_forward_declaration() && + !ctx.no_default_by_name(item) && + !item.annotations().disallow_default(); + } + + let all_template_params = item.all_template_params(ctx); + + if derivable_traits.contains(DerivableTraits::COPY) && + !derivable_traits.contains(DerivableTraits::CLONE) + { + needs_clone_impl = true; + } + + if !derivable_traits.contains(DerivableTraits::PARTIAL_EQ) { + needs_partialeq_impl = ctx.options().derive_partialeq && + ctx.options().impl_partialeq && + ctx.lookup_can_derive_partialeq_or_partialord(item.id()) == + CanDerive::Manually; + } + + let mut derives: Vec<_> = derivable_traits.into(); + derives.extend(item.annotations().derives().iter().map(String::as_str)); + + let is_rust_union = is_union && struct_layout.is_rust_union(); + + // The custom derives callback may return a list of derive attributes; + // add them to the end of the list. + let custom_derives = ctx.options().all_callbacks(|cb| { + cb.add_derives(&DeriveInfo { + name: &canonical_name, + kind: if is_rust_union { + DeriveTypeKind::Union + } else { + DeriveTypeKind::Struct + }, + }) + }); + // In most cases this will be a no-op, since custom_derives will be empty. + derives.extend(custom_derives.iter().map(|s| s.as_str())); + + if !derives.is_empty() { + attributes.push(attributes::derives(&derives)) + } + + if item.must_use(ctx) { + attributes.push(attributes::must_use()); + } + + let mut tokens = if is_rust_union { + quote! { + #( #attributes )* + pub union #canonical_ident + } + } else { + quote! { + #( #attributes )* + pub struct #canonical_ident + } + }; + + tokens.append_all(quote! { + #generics { + #( #fields )* + } + }); + result.push(tokens); + + // Generate the inner types and all that stuff. + // + // TODO: In the future we might want to be smart, and use nested + // modules, and whatnot. + for ty in self.inner_types() { + let child_item = ctx.resolve_item(*ty); + // assert_eq!(child_item.parent_id(), item.id()); + child_item.codegen(ctx, result, &()); + } + + // NOTE: Some unexposed attributes (like alignment attributes) may + // affect layout, so we're bad and pray to the gods for avoid sending + // all the tests to shit when parsing things like max_align_t. + if self.found_unknown_attr() { + warn!( + "Type {} has an unknown attribute that may affect layout", + canonical_ident + ); + } + + if all_template_params.is_empty() { + if !is_opaque { + for var in self.inner_vars() { + ctx.resolve_item(*var).codegen(ctx, result, &()); + } + } + + if ctx.options().layout_tests && !self.is_forward_declaration() { + if let Some(layout) = layout { + let fn_name = + format!("bindgen_test_layout_{}", canonical_ident); + let fn_name = ctx.rust_ident_raw(fn_name); + let prefix = ctx.trait_prefix(); + let size_of_expr = quote! { + ::#prefix::mem::size_of::<#canonical_ident>() + }; + let align_of_expr = quote! { + ::#prefix::mem::align_of::<#canonical_ident>() + }; + let size = layout.size; + let align = layout.align; + + let check_struct_align = if align > + ctx.target_pointer_size() && + !ctx.options().rust_features().repr_align + { + None + } else { + Some(quote! { + assert_eq!(#align_of_expr, + #align, + concat!("Alignment of ", stringify!(#canonical_ident))); + + }) + }; + + let should_skip_field_offset_checks = is_opaque; + + let check_field_offset = if should_skip_field_offset_checks + { + vec![] + } else { + self.fields() + .iter() + .filter_map(|field| match *field { + Field::DataMember(ref f) if f.name().is_some() => Some(f), + _ => None, + }) + .flat_map(|field| { + let name = field.name().unwrap(); + field.offset().map(|offset| { + let field_offset = offset / 8; + let field_name = ctx.rust_ident(name); + quote! { + assert_eq!( + unsafe { + ::#prefix::ptr::addr_of!((*ptr).#field_name) as usize - ptr as usize + }, + #field_offset, + concat!("Offset of field: ", stringify!(#canonical_ident), "::", stringify!(#field_name)) + ); + } + }) + }) + .collect() + }; + + let uninit_decl = if !check_field_offset.is_empty() { + // FIXME: When MSRV >= 1.59.0, we can use + // > const PTR: *const #canonical_ident = ::#prefix::mem::MaybeUninit::uninit().as_ptr(); + Some(quote! { + // Use a shared MaybeUninit so that rustc with + // opt-level=0 doesn't take too much stack space, + // see #2218. + const UNINIT: ::#prefix::mem::MaybeUninit<#canonical_ident> = ::#prefix::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + }) + } else { + None + }; + + let item = quote! { + #[test] + fn #fn_name() { + #uninit_decl + assert_eq!(#size_of_expr, + #size, + concat!("Size of: ", stringify!(#canonical_ident))); + #check_struct_align + #( #check_field_offset )* + } + }; + result.push(item); + } + } + + let mut method_names = Default::default(); + if ctx.options().codegen_config.methods() { + for method in self.methods() { + assert!(method.kind() != MethodKind::Constructor); + method.codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + + if ctx.options().codegen_config.constructors() { + for sig in self.constructors() { + Method::new( + MethodKind::Constructor, + *sig, + /* const */ + false, + ) + .codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + + if ctx.options().codegen_config.destructors() { + if let Some((kind, destructor)) = self.destructor() { + debug_assert!(kind.is_destructor()); + Method::new(kind, destructor, false).codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + } + + // NB: We can't use to_rust_ty here since for opaque types this tries to + // use the specialization knowledge to generate a blob field. + let ty_for_impl = quote! { + #canonical_ident #generics + }; + + if needs_clone_impl { + result.push(quote! { + impl #generics Clone for #ty_for_impl { + fn clone(&self) -> Self { *self } + } + }); + } + + if needs_default_impl { + let prefix = ctx.trait_prefix(); + let body = if ctx.options().rust_features().maybe_uninit { + quote! { + let mut s = ::#prefix::mem::MaybeUninit::::uninit(); + unsafe { + ::#prefix::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } + } else { + quote! { + unsafe { + let mut s: Self = ::#prefix::mem::uninitialized(); + ::#prefix::ptr::write_bytes(&mut s, 0, 1); + s + } + } + }; + // Note we use `ptr::write_bytes()` instead of `mem::zeroed()` because the latter does + // not necessarily ensure padding bytes are zeroed. Some C libraries are sensitive to + // non-zero padding bytes, especially when forwards/backwards compatability is + // involved. + result.push(quote! { + impl #generics Default for #ty_for_impl { + fn default() -> Self { + #body + } + } + }); + } + + if needs_debug_impl { + let impl_ = impl_debug::gen_debug_impl( + ctx, + self.fields(), + item, + self.kind(), + ); + + let prefix = ctx.trait_prefix(); + + result.push(quote! { + impl #generics ::#prefix::fmt::Debug for #ty_for_impl { + #impl_ + } + }); + } + + if needs_partialeq_impl { + if let Some(impl_) = impl_partialeq::gen_partialeq_impl( + ctx, + self, + item, + &ty_for_impl, + ) { + let partialeq_bounds = if !generic_param_names.is_empty() { + let bounds = generic_param_names.iter().map(|t| { + quote! { #t: PartialEq } + }); + quote! { where #( #bounds ),* } + } else { + quote! {} + }; + + let prefix = ctx.trait_prefix(); + result.push(quote! { + impl #generics ::#prefix::cmp::PartialEq for #ty_for_impl #partialeq_bounds { + #impl_ + } + }); + } + } + + if !methods.is_empty() { + result.push(quote! { + impl #generics #ty_for_impl { + #( #methods )* + } + }); + } + } +} + +impl Method { + fn codegen_method( + &self, + ctx: &BindgenContext, + methods: &mut Vec, + method_names: &mut HashSet, + result: &mut CodegenResult<'_>, + _parent: &CompInfo, + ) { + assert!({ + let cc = &ctx.options().codegen_config; + match self.kind() { + MethodKind::Constructor => cc.constructors(), + MethodKind::Destructor => cc.destructors(), + MethodKind::VirtualDestructor { .. } => cc.destructors(), + MethodKind::Static | + MethodKind::Normal | + MethodKind::Virtual { .. } => cc.methods(), + } + }); + + // TODO(emilio): We could generate final stuff at least. + if self.is_virtual() { + return; // FIXME + } + + // First of all, output the actual function. + let function_item = ctx.resolve_item(self.signature()); + if !function_item.process_before_codegen(ctx, result) { + return; + } + let function = function_item.expect_function(); + let times_seen = function.codegen(ctx, result, function_item); + let times_seen = match times_seen { + Some(seen) => seen, + None => return, + }; + let signature_item = ctx.resolve_item(function.signature()); + let mut name = match self.kind() { + MethodKind::Constructor => "new".into(), + MethodKind::Destructor => "destruct".into(), + _ => function.name().to_owned(), + }; + + let signature = match *signature_item.expect_type().kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("How in the world?"), + }; + + let supported_abi = match signature.abi(ctx, Some(&*name)) { + ClangAbi::Known(Abi::ThisCall) => { + ctx.options().rust_features().thiscall_abi + } + ClangAbi::Known(Abi::Vectorcall) => { + ctx.options().rust_features().vectorcall_abi + } + ClangAbi::Known(Abi::CUnwind) => { + ctx.options().rust_features().c_unwind_abi + } + ClangAbi::Known(Abi::EfiApi) => { + ctx.options().rust_features().abi_efiapi + } + _ => true, + }; + + if !supported_abi { + return; + } + + // Do not generate variadic methods, since rust does not allow + // implementing them, and we don't do a good job at it anyway. + if signature.is_variadic() { + return; + } + + if method_names.contains(&name) { + let mut count = 1; + let mut new_name; + + while { + new_name = format!("{}{}", name, count); + method_names.contains(&new_name) + } { + count += 1; + } + + name = new_name; + } + + method_names.insert(name.clone()); + + let mut function_name = function_item.canonical_name(ctx); + if times_seen > 0 { + write!(&mut function_name, "{}", times_seen).unwrap(); + } + let function_name = ctx.rust_ident(function_name); + let mut args = utils::fnsig_arguments(ctx, signature); + let mut ret = utils::fnsig_return_ty(ctx, signature); + + if !self.is_static() && !self.is_constructor() { + args[0] = if self.is_const() { + quote! { &self } + } else { + quote! { &mut self } + }; + } + + // If it's a constructor, we always return `Self`, and we inject the + // "this" parameter, so there's no need to ask the user for it. + // + // Note that constructors in Clang are represented as functions with + // return-type = void. + if self.is_constructor() { + args.remove(0); + ret = quote! { -> Self }; + } + + let mut exprs = + helpers::ast_ty::arguments_from_signature(signature, ctx); + + let mut stmts = vec![]; + + // If it's a constructor, we need to insert an extra parameter with a + // variable called `__bindgen_tmp` we're going to create. + if self.is_constructor() { + let prefix = ctx.trait_prefix(); + let tmp_variable_decl = if ctx + .options() + .rust_features() + .maybe_uninit + { + exprs[0] = quote! { + __bindgen_tmp.as_mut_ptr() + }; + quote! { + let mut __bindgen_tmp = ::#prefix::mem::MaybeUninit::uninit() + } + } else { + exprs[0] = quote! { + &mut __bindgen_tmp + }; + quote! { + let mut __bindgen_tmp = ::#prefix::mem::uninitialized() + } + }; + stmts.push(tmp_variable_decl); + } else if !self.is_static() { + assert!(!exprs.is_empty()); + exprs[0] = quote! { + self + }; + }; + + let call = quote! { + #function_name (#( #exprs ),* ) + }; + + stmts.push(call); + + if self.is_constructor() { + stmts.push(if ctx.options().rust_features().maybe_uninit { + quote! { + __bindgen_tmp.assume_init() + } + } else { + quote! { + __bindgen_tmp + } + }) + } + + let block = ctx.wrap_unsafe_ops(quote! ( #( #stmts );*)); + + let mut attrs = vec![attributes::inline()]; + + if signature.must_use() && + ctx.options().rust_features().must_use_function + { + attrs.push(attributes::must_use()); + } + + let name = ctx.rust_ident(&name); + methods.push(quote! { + #(#attrs)* + pub unsafe fn #name ( #( #args ),* ) #ret { + #block + } + }); + } +} + +/// A helper type that represents different enum variations. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum EnumVariation { + /// The code for this enum will use a Rust enum. Note that creating this in unsafe code + /// (including FFI) with an invalid value will invoke undefined behaviour, whether or not + /// its marked as non_exhaustive. + Rust { + /// Indicates whether the generated struct should be `#[non_exhaustive]` + non_exhaustive: bool, + }, + /// The code for this enum will use a newtype + NewType { + /// Indicates whether the newtype will have bitwise operators + is_bitfield: bool, + /// Indicates whether the variants will be represented as global constants + is_global: bool, + }, + /// The code for this enum will use consts + Consts, + /// The code for this enum will use a module containing consts + ModuleConsts, +} + +impl EnumVariation { + fn is_rust(&self) -> bool { + matches!(*self, EnumVariation::Rust { .. }) + } + + /// Both the `Const` and `ModuleConsts` variants will cause this to return + /// true. + fn is_const(&self) -> bool { + matches!(*self, EnumVariation::Consts | EnumVariation::ModuleConsts) + } +} + +impl Default for EnumVariation { + fn default() -> EnumVariation { + EnumVariation::Consts + } +} + +impl fmt::Display for EnumVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Rust { + non_exhaustive: false, + } => "rust", + Self::Rust { + non_exhaustive: true, + } => "rust_non_exhaustive", + Self::NewType { + is_bitfield: true, .. + } => "bitfield", + Self::NewType { + is_bitfield: false, + is_global, + } => { + if *is_global { + "newtype_global" + } else { + "newtype" + } + } + Self::Consts => "consts", + Self::ModuleConsts => "moduleconsts", + }; + s.fmt(f) + } +} + +impl std::str::FromStr for EnumVariation { + type Err = std::io::Error; + + /// Create a `EnumVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "rust" => Ok(EnumVariation::Rust { + non_exhaustive: false, + }), + "rust_non_exhaustive" => Ok(EnumVariation::Rust { + non_exhaustive: true, + }), + "bitfield" => Ok(EnumVariation::NewType { + is_bitfield: true, + is_global: false, + }), + "consts" => Ok(EnumVariation::Consts), + "moduleconsts" => Ok(EnumVariation::ModuleConsts), + "newtype" => Ok(EnumVariation::NewType { + is_bitfield: false, + is_global: false, + }), + "newtype_global" => Ok(EnumVariation::NewType { + is_bitfield: false, + is_global: true, + }), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid EnumVariation. Accepted values ", + "are 'rust', 'rust_non_exhaustive', 'bitfield', 'consts',", + "'moduleconsts', 'newtype' and 'newtype_global'." + ), + )), + } + } +} + +/// A helper type to construct different enum variations. +enum EnumBuilder<'a> { + Rust { + attrs: Vec, + ident: Ident, + tokens: proc_macro2::TokenStream, + emitted_any_variants: bool, + }, + NewType { + canonical_name: &'a str, + tokens: proc_macro2::TokenStream, + is_bitfield: bool, + is_global: bool, + }, + Consts { + variants: Vec, + }, + ModuleConsts { + module_name: &'a str, + module_items: Vec, + }, +} + +impl<'a> EnumBuilder<'a> { + /// Returns true if the builder is for a rustified enum. + fn is_rust_enum(&self) -> bool { + matches!(*self, EnumBuilder::Rust { .. }) + } + + /// Create a new enum given an item builder, a canonical name, a name for + /// the representation, and which variation it should be generated as. + fn new( + name: &'a str, + mut attrs: Vec, + repr: proc_macro2::TokenStream, + enum_variation: EnumVariation, + has_typedef: bool, + ) -> Self { + let ident = Ident::new(name, Span::call_site()); + + match enum_variation { + EnumVariation::NewType { + is_bitfield, + is_global, + } => EnumBuilder::NewType { + canonical_name: name, + tokens: quote! { + #( #attrs )* + pub struct #ident (pub #repr); + }, + is_bitfield, + is_global, + }, + + EnumVariation::Rust { .. } => { + // `repr` is guaranteed to be Rustified in Enum::codegen + attrs.insert(0, quote! { #[repr( #repr )] }); + let tokens = quote!(); + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants: false, + } + } + + EnumVariation::Consts => { + let mut variants = Vec::new(); + + if !has_typedef { + variants.push(quote! { + #( #attrs )* + pub type #ident = #repr; + }); + } + + EnumBuilder::Consts { variants } + } + + EnumVariation::ModuleConsts => { + let ident = Ident::new( + CONSTIFIED_ENUM_MODULE_REPR_NAME, + Span::call_site(), + ); + let type_definition = quote! { + #( #attrs )* + pub type #ident = #repr; + }; + + EnumBuilder::ModuleConsts { + module_name: name, + module_items: vec![type_definition], + } + } + } + } + + /// Add a variant to this enum. + fn with_variant( + self, + ctx: &BindgenContext, + variant: &EnumVariant, + mangling_prefix: Option<&str>, + rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + is_ty_named: bool, + ) -> Self { + let variant_name = ctx.rust_mangle(variant.name()); + let is_rust_enum = self.is_rust_enum(); + let expr = match variant.val() { + EnumVariantValue::Boolean(v) if is_rust_enum => { + helpers::ast_ty::uint_expr(v as u64) + } + EnumVariantValue::Boolean(v) => quote!(#v), + EnumVariantValue::Signed(v) => helpers::ast_ty::int_expr(v), + EnumVariantValue::Unsigned(v) => helpers::ast_ty::uint_expr(v), + }; + + let mut doc = quote! {}; + if ctx.options().generate_comments { + if let Some(raw_comment) = variant.comment() { + let comment = ctx.options().process_comment(raw_comment); + doc = attributes::doc(comment); + } + } + + match self { + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants: _, + } => { + let name = ctx.rust_ident(variant_name); + EnumBuilder::Rust { + attrs, + ident, + tokens: quote! { + #tokens + #doc + #name = #expr, + }, + emitted_any_variants: true, + } + } + + EnumBuilder::NewType { + canonical_name, + is_global, + .. + } => { + if ctx.options().rust_features().associated_const && + is_ty_named && + !is_global + { + let enum_ident = ctx.rust_ident(canonical_name); + let variant_ident = ctx.rust_ident(variant_name); + + result.push(quote! { + impl #enum_ident { + #doc + pub const #variant_ident : #rust_ty = #rust_ty ( #expr ); + } + }); + } else { + let ident = ctx.rust_ident(match mangling_prefix { + Some(prefix) => { + Cow::Owned(format!("{}_{}", prefix, variant_name)) + } + None => variant_name, + }); + result.push(quote! { + #doc + pub const #ident : #rust_ty = #rust_ty ( #expr ); + }); + } + + self + } + + EnumBuilder::Consts { .. } => { + let constant_name = match mangling_prefix { + Some(prefix) => { + Cow::Owned(format!("{}_{}", prefix, variant_name)) + } + None => variant_name, + }; + + let ident = ctx.rust_ident(constant_name); + result.push(quote! { + #doc + pub const #ident : #rust_ty = #expr ; + }); + + self + } + EnumBuilder::ModuleConsts { + module_name, + mut module_items, + } => { + let name = ctx.rust_ident(variant_name); + let ty = ctx.rust_ident(CONSTIFIED_ENUM_MODULE_REPR_NAME); + module_items.push(quote! { + #doc + pub const #name : #ty = #expr ; + }); + + EnumBuilder::ModuleConsts { + module_name, + module_items, + } + } + } + } + + fn build( + self, + ctx: &BindgenContext, + rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + ) -> proc_macro2::TokenStream { + match self { + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants, + .. + } => { + let variants = if !emitted_any_variants { + quote!(__bindgen_cannot_repr_c_on_empty_enum = 0) + } else { + tokens + }; + + quote! { + #( #attrs )* + pub enum #ident { + #variants + } + } + } + EnumBuilder::NewType { + canonical_name, + tokens, + is_bitfield, + .. + } => { + if !is_bitfield { + return tokens; + } + + let rust_ty_name = ctx.rust_ident_raw(canonical_name); + let prefix = ctx.trait_prefix(); + + result.push(quote! { + impl ::#prefix::ops::BitOr<#rust_ty> for #rust_ty { + type Output = Self; + + #[inline] + fn bitor(self, other: Self) -> Self { + #rust_ty_name(self.0 | other.0) + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitOrAssign for #rust_ty { + #[inline] + fn bitor_assign(&mut self, rhs: #rust_ty) { + self.0 |= rhs.0; + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitAnd<#rust_ty> for #rust_ty { + type Output = Self; + + #[inline] + fn bitand(self, other: Self) -> Self { + #rust_ty_name(self.0 & other.0) + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitAndAssign for #rust_ty { + #[inline] + fn bitand_assign(&mut self, rhs: #rust_ty) { + self.0 &= rhs.0; + } + } + }); + + tokens + } + EnumBuilder::Consts { variants, .. } => quote! { #( #variants )* }, + EnumBuilder::ModuleConsts { + module_items, + module_name, + .. + } => { + let ident = ctx.rust_ident(module_name); + quote! { + pub mod #ident { + #( #module_items )* + } + } + } + } + } +} + +impl CodeGenerator for Enum { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(&name); + let enum_ty = item.expect_type(); + let layout = enum_ty.layout(ctx); + let variation = self.computed_enum_variation(ctx, item); + + let repr_translated; + let repr = match self.repr().map(|repr| ctx.resolve_type(repr)) { + Some(repr) + if !ctx.options().translate_enum_integer_types && + !variation.is_rust() => + { + repr + } + repr => { + // An enum's integer type is translated to a native Rust + // integer type in 3 cases: + // * the enum is Rustified and we need a translated type for + // the repr attribute + // * the representation couldn't be determined from the C source + // * it was explicitly requested as a bindgen option + + let kind = match repr { + Some(repr) => match *repr.canonical_type(ctx).kind() { + TypeKind::Int(int_kind) => int_kind, + _ => panic!("Unexpected type as enum repr"), + }, + None => { + warn!( + "Guessing type of enum! Forward declarations of enums \ + shouldn't be legal!" + ); + IntKind::Int + } + }; + + let signed = kind.is_signed(); + let size = layout + .map(|l| l.size) + .or_else(|| kind.known_size()) + .unwrap_or(0); + + let translated = match (signed, size) { + (true, 1) => IntKind::I8, + (false, 1) => IntKind::U8, + (true, 2) => IntKind::I16, + (false, 2) => IntKind::U16, + (true, 4) => IntKind::I32, + (false, 4) => IntKind::U32, + (true, 8) => IntKind::I64, + (false, 8) => IntKind::U64, + _ => { + warn!( + "invalid enum decl: signed: {}, size: {}", + signed, size + ); + IntKind::I32 + } + }; + + repr_translated = + Type::new(None, None, TypeKind::Int(translated), false); + &repr_translated + } + }; + + let mut attrs = vec![]; + + // TODO(emilio): Delegate this to the builders? + match variation { + EnumVariation::Rust { non_exhaustive } => { + if non_exhaustive && + ctx.options().rust_features().non_exhaustive + { + attrs.push(attributes::non_exhaustive()); + } else if non_exhaustive && + !ctx.options().rust_features().non_exhaustive + { + panic!("The rust target you're using doesn't seem to support non_exhaustive enums"); + } + } + EnumVariation::NewType { .. } => { + if ctx.options().rust_features.repr_transparent { + attrs.push(attributes::repr("transparent")); + } else { + attrs.push(attributes::repr("C")); + } + } + _ => {} + }; + + if let Some(comment) = item.comment(ctx) { + attrs.push(attributes::doc(comment)); + } + + if item.must_use(ctx) { + attrs.push(attributes::must_use()); + } + + if !variation.is_const() { + let packed = false; // Enums can't be packed in Rust. + let mut derives = derives_of_item(item, ctx, packed); + // For backwards compat, enums always derive + // Clone/Eq/PartialEq/Hash, even if we don't generate those by + // default. + derives.insert( + DerivableTraits::CLONE | + DerivableTraits::HASH | + DerivableTraits::PARTIAL_EQ | + DerivableTraits::EQ, + ); + let mut derives: Vec<_> = derives.into(); + for derive in item.annotations().derives().iter() { + if !derives.contains(&derive.as_str()) { + derives.push(derive); + } + } + + // The custom derives callback may return a list of derive attributes; + // add them to the end of the list. + let custom_derives = ctx.options().all_callbacks(|cb| { + cb.add_derives(&DeriveInfo { + name: &name, + kind: DeriveTypeKind::Enum, + }) + }); + // In most cases this will be a no-op, since custom_derives will be empty. + derives.extend(custom_derives.iter().map(|s| s.as_str())); + + attrs.push(attributes::derives(&derives)); + } + + fn add_constant( + ctx: &BindgenContext, + enum_: &Type, + // Only to avoid recomputing every time. + enum_canonical_name: &Ident, + // May be the same as "variant" if it's because the + // enum is unnamed and we still haven't seen the + // value. + variant_name: &Ident, + referenced_name: &Ident, + enum_rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + ) { + let constant_name = if enum_.name().is_some() { + if ctx.options().prepend_enum_name { + format!("{}_{}", enum_canonical_name, variant_name) + } else { + format!("{}", variant_name) + } + } else { + format!("{}", variant_name) + }; + let constant_name = ctx.rust_ident(constant_name); + + result.push(quote! { + pub const #constant_name : #enum_rust_ty = + #enum_canonical_name :: #referenced_name ; + }); + } + + let repr = repr.to_rust_ty_or_opaque(ctx, item); + let has_typedef = ctx.is_enum_typedef_combo(item.id()); + + let mut builder = + EnumBuilder::new(&name, attrs, repr, variation, has_typedef); + + // A map where we keep a value -> variant relation. + let mut seen_values = HashMap::<_, Ident>::default(); + let enum_rust_ty = item.to_rust_ty_or_opaque(ctx, &()); + let is_toplevel = item.is_toplevel(ctx); + + // Used to mangle the constants we generate in the unnamed-enum case. + let parent_canonical_name = if is_toplevel { + None + } else { + Some(item.parent_id().canonical_name(ctx)) + }; + + let constant_mangling_prefix = if ctx.options().prepend_enum_name { + if enum_ty.name().is_none() { + parent_canonical_name.as_deref() + } else { + Some(&*name) + } + } else { + None + }; + + // NB: We defer the creation of constified variants, in case we find + // another variant with the same value (which is the common thing to + // do). + let mut constified_variants = VecDeque::new(); + + let mut iter = self.variants().iter().peekable(); + while let Some(variant) = + iter.next().or_else(|| constified_variants.pop_front()) + { + if variant.hidden() { + continue; + } + + if variant.force_constification() && iter.peek().is_some() { + constified_variants.push_back(variant); + continue; + } + + match seen_values.entry(variant.val()) { + Entry::Occupied(ref entry) => { + if variation.is_rust() { + let variant_name = ctx.rust_mangle(variant.name()); + let mangled_name = + if is_toplevel || enum_ty.name().is_some() { + variant_name + } else { + let parent_name = + parent_canonical_name.as_ref().unwrap(); + + Cow::Owned(format!( + "{}_{}", + parent_name, variant_name + )) + }; + + let existing_variant_name = entry.get(); + // Use associated constants for named enums. + if enum_ty.name().is_some() && + ctx.options().rust_features().associated_const + { + let enum_canonical_name = &ident; + let variant_name = + ctx.rust_ident_raw(&*mangled_name); + result.push(quote! { + impl #enum_rust_ty { + pub const #variant_name : #enum_rust_ty = + #enum_canonical_name :: #existing_variant_name ; + } + }); + } else { + add_constant( + ctx, + enum_ty, + &ident, + &Ident::new(&mangled_name, Span::call_site()), + existing_variant_name, + enum_rust_ty.clone(), + result, + ); + } + } else { + builder = builder.with_variant( + ctx, + variant, + constant_mangling_prefix, + enum_rust_ty.clone(), + result, + enum_ty.name().is_some(), + ); + } + } + Entry::Vacant(entry) => { + builder = builder.with_variant( + ctx, + variant, + constant_mangling_prefix, + enum_rust_ty.clone(), + result, + enum_ty.name().is_some(), + ); + + let variant_name = ctx.rust_ident(variant.name()); + + // If it's an unnamed enum, or constification is enforced, + // we also generate a constant so it can be properly + // accessed. + if (variation.is_rust() && enum_ty.name().is_none()) || + variant.force_constification() + { + let mangled_name = if is_toplevel { + variant_name.clone() + } else { + let parent_name = + parent_canonical_name.as_ref().unwrap(); + + Ident::new( + &format!("{}_{}", parent_name, variant_name), + Span::call_site(), + ) + }; + + add_constant( + ctx, + enum_ty, + &ident, + &mangled_name, + &variant_name, + enum_rust_ty.clone(), + result, + ); + } + + entry.insert(variant_name); + } + } + } + + let item = builder.build(ctx, enum_rust_ty, result); + result.push(item); + } +} + +/// Enum for the default type of macro constants. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum MacroTypeVariation { + /// Use i32 or i64 + Signed, + /// Use u32 or u64 + Unsigned, +} + +impl fmt::Display for MacroTypeVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Signed => "signed", + Self::Unsigned => "unsigned", + }; + s.fmt(f) + } +} + +impl Default for MacroTypeVariation { + fn default() -> MacroTypeVariation { + MacroTypeVariation::Unsigned + } +} + +impl std::str::FromStr for MacroTypeVariation { + type Err = std::io::Error; + + /// Create a `MacroTypeVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "signed" => Ok(MacroTypeVariation::Signed), + "unsigned" => Ok(MacroTypeVariation::Unsigned), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid MacroTypeVariation. Accepted values ", + "are 'signed' and 'unsigned'" + ), + )), + } + } +} + +/// Enum for how aliases should be translated. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum AliasVariation { + /// Convert to regular Rust alias + TypeAlias, + /// Create a new type by wrapping the old type in a struct and using #[repr(transparent)] + NewType, + /// Same as NewStruct but also impl Deref to be able to use the methods of the wrapped type + NewTypeDeref, +} + +impl fmt::Display for AliasVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::TypeAlias => "type_alias", + Self::NewType => "new_type", + Self::NewTypeDeref => "new_type_deref", + }; + + s.fmt(f) + } +} + +impl Default for AliasVariation { + fn default() -> AliasVariation { + AliasVariation::TypeAlias + } +} + +impl std::str::FromStr for AliasVariation { + type Err = std::io::Error; + + /// Create an `AliasVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "type_alias" => Ok(AliasVariation::TypeAlias), + "new_type" => Ok(AliasVariation::NewType), + "new_type_deref" => Ok(AliasVariation::NewTypeDeref), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid AliasVariation. Accepted values ", + "are 'type_alias', 'new_type', and 'new_type_deref'" + ), + )), + } + } +} + +/// Enum for how non-`Copy` `union`s should be translated. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum NonCopyUnionStyle { + /// Wrap members in a type generated by `bindgen`. + BindgenWrapper, + /// Wrap members in [`::core::mem::ManuallyDrop`]. + /// + /// Note: `ManuallyDrop` was stabilized in Rust 1.20.0, do not use it if your + /// MSRV is lower. + ManuallyDrop, +} + +impl fmt::Display for NonCopyUnionStyle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::BindgenWrapper => "bindgen_wrapper", + Self::ManuallyDrop => "manually_drop", + }; + + s.fmt(f) + } +} + +impl Default for NonCopyUnionStyle { + fn default() -> Self { + Self::BindgenWrapper + } +} + +impl std::str::FromStr for NonCopyUnionStyle { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + match s { + "bindgen_wrapper" => Ok(Self::BindgenWrapper), + "manually_drop" => Ok(Self::ManuallyDrop), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid NonCopyUnionStyle. Accepted values ", + "are 'bindgen_wrapper' and 'manually_drop'" + ), + )), + } + } +} + +/// Fallible conversion to an opaque blob. +/// +/// Implementors of this trait should provide the `try_get_layout` method to +/// fallibly get this thing's layout, which the provided `try_to_opaque` trait +/// method will use to convert the `Layout` into an opaque blob Rust type. +trait TryToOpaque { + type Extra; + + /// Get the layout for this thing, if one is available. + fn try_get_layout( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result; + + /// Do not override this provided trait method. + fn try_to_opaque( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result { + self.try_get_layout(ctx, extra) + .map(|layout| helpers::blob(ctx, layout)) + } +} + +/// Infallible conversion of an IR thing to an opaque blob. +/// +/// The resulting layout is best effort, and is unfortunately not guaranteed to +/// be correct. When all else fails, we fall back to a single byte layout as a +/// last resort, because C++ does not permit zero-sized types. See the note in +/// the `ToRustTyOrOpaque` doc comment about fallible versus infallible traits +/// and when each is appropriate. +/// +/// Don't implement this directly. Instead implement `TryToOpaque`, and then +/// leverage the blanket impl for this trait. +trait ToOpaque: TryToOpaque { + fn get_layout(&self, ctx: &BindgenContext, extra: &Self::Extra) -> Layout { + self.try_get_layout(ctx, extra) + .unwrap_or_else(|_| Layout::for_size(ctx, 1)) + } + + fn to_opaque( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> proc_macro2::TokenStream { + let layout = self.get_layout(ctx, extra); + helpers::blob(ctx, layout) + } +} + +impl ToOpaque for T where T: TryToOpaque {} + +/// Fallible conversion from an IR thing to an *equivalent* Rust type. +/// +/// If the C/C++ construct represented by the IR thing cannot (currently) be +/// represented in Rust (for example, instantiations of templates with +/// const-value generic parameters) then the impl should return an `Err`. It +/// should *not* attempt to return an opaque blob with the correct size and +/// alignment. That is the responsibility of the `TryToOpaque` trait. +trait TryToRustTy { + type Extra; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result; +} + +/// Fallible conversion to a Rust type or an opaque blob with the correct size +/// and alignment. +/// +/// Don't implement this directly. Instead implement `TryToRustTy` and +/// `TryToOpaque`, and then leverage the blanket impl for this trait below. +trait TryToRustTyOrOpaque: TryToRustTy + TryToOpaque { + type Extra; + + fn try_to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &::Extra, + ) -> error::Result; +} + +impl TryToRustTyOrOpaque for T +where + T: TryToRustTy + TryToOpaque, +{ + type Extra = E; + + fn try_to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &E, + ) -> error::Result { + self.try_to_rust_ty(ctx, extra).or_else(|_| { + if let Ok(layout) = self.try_get_layout(ctx, extra) { + Ok(helpers::blob(ctx, layout)) + } else { + Err(error::Error::NoLayoutForOpaqueBlob) + } + }) + } +} + +/// Infallible conversion to a Rust type, or an opaque blob with a best effort +/// of correct size and alignment. +/// +/// Don't implement this directly. Instead implement `TryToRustTy` and +/// `TryToOpaque`, and then leverage the blanket impl for this trait below. +/// +/// ### Fallible vs. Infallible Conversions to Rust Types +/// +/// When should one use this infallible `ToRustTyOrOpaque` trait versus the +/// fallible `TryTo{RustTy, Opaque, RustTyOrOpaque}` triats? All fallible trait +/// implementations that need to convert another thing into a Rust type or +/// opaque blob in a nested manner should also use fallible trait methods and +/// propagate failure up the stack. Only infallible functions and methods like +/// CodeGenerator implementations should use the infallible +/// `ToRustTyOrOpaque`. The further out we push error recovery, the more likely +/// we are to get a usable `Layout` even if we can't generate an equivalent Rust +/// type for a C++ construct. +trait ToRustTyOrOpaque: TryToRustTy + ToOpaque { + type Extra; + + fn to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &::Extra, + ) -> proc_macro2::TokenStream; +} + +impl ToRustTyOrOpaque for T +where + T: TryToRustTy + ToOpaque, +{ + type Extra = E; + + fn to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &E, + ) -> proc_macro2::TokenStream { + self.try_to_rust_ty(ctx, extra) + .unwrap_or_else(|_| self.to_opaque(ctx, extra)) + } +} + +impl TryToOpaque for T +where + T: Copy + Into, +{ + type Extra = (); + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + ctx.resolve_item((*self).into()).try_get_layout(ctx, &()) + } +} + +impl TryToRustTy for T +where + T: Copy + Into, +{ + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + ctx.resolve_item((*self).into()).try_to_rust_ty(ctx, &()) + } +} + +impl TryToOpaque for Item { + type Extra = (); + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + self.kind().expect_type().try_get_layout(ctx, self) + } +} + +impl TryToRustTy for Item { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + self.kind().expect_type().try_to_rust_ty(ctx, self) + } +} + +impl TryToOpaque for Type { + type Extra = Item; + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &Item, + ) -> error::Result { + self.layout(ctx).ok_or(error::Error::NoLayoutForOpaqueBlob) + } +} + +impl TryToRustTy for Type { + type Extra = Item; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + use self::helpers::ast_ty::*; + + match *self.kind() { + TypeKind::Void => Ok(c_void(ctx)), + // TODO: we should do something smart with nullptr, or maybe *const + // c_void is enough? + TypeKind::NullPtr => Ok(c_void(ctx).to_ptr(true)), + TypeKind::Int(ik) => { + match ik { + IntKind::Bool => Ok(quote! { bool }), + IntKind::Char { .. } => Ok(raw_type(ctx, "c_char")), + IntKind::SChar => Ok(raw_type(ctx, "c_schar")), + IntKind::UChar => Ok(raw_type(ctx, "c_uchar")), + IntKind::Short => Ok(raw_type(ctx, "c_short")), + IntKind::UShort => Ok(raw_type(ctx, "c_ushort")), + IntKind::Int => Ok(raw_type(ctx, "c_int")), + IntKind::UInt => Ok(raw_type(ctx, "c_uint")), + IntKind::Long => Ok(raw_type(ctx, "c_long")), + IntKind::ULong => Ok(raw_type(ctx, "c_ulong")), + IntKind::LongLong => Ok(raw_type(ctx, "c_longlong")), + IntKind::ULongLong => Ok(raw_type(ctx, "c_ulonglong")), + IntKind::WChar => { + let layout = self + .layout(ctx) + .expect("Couldn't compute wchar_t's layout?"); + let ty = Layout::known_type_for_size(ctx, layout.size) + .expect("Non-representable wchar_t?"); + let ident = ctx.rust_ident_raw(ty); + Ok(quote! { #ident }) + } + + IntKind::I8 => Ok(quote! { i8 }), + IntKind::U8 => Ok(quote! { u8 }), + IntKind::I16 => Ok(quote! { i16 }), + IntKind::U16 => Ok(quote! { u16 }), + IntKind::I32 => Ok(quote! { i32 }), + IntKind::U32 => Ok(quote! { u32 }), + IntKind::I64 => Ok(quote! { i64 }), + IntKind::U64 => Ok(quote! { u64 }), + IntKind::Custom { name, .. } => { + Ok(proc_macro2::TokenStream::from_str(name).unwrap()) + } + IntKind::U128 => { + Ok(if ctx.options().rust_features.i128_and_u128 { + quote! { u128 } + } else { + // Best effort thing, but wrong alignment + // unfortunately. + quote! { [u64; 2] } + }) + } + IntKind::I128 => { + Ok(if ctx.options().rust_features.i128_and_u128 { + quote! { i128 } + } else { + quote! { [u64; 2] } + }) + } + } + } + TypeKind::Float(fk) => { + Ok(float_kind_rust_type(ctx, fk, self.layout(ctx))) + } + TypeKind::Complex(fk) => { + let float_path = + float_kind_rust_type(ctx, fk, self.layout(ctx)); + + ctx.generated_bindgen_complex(); + Ok(if ctx.options().enable_cxx_namespaces { + quote! { + root::__BindgenComplex<#float_path> + } + } else { + quote! { + __BindgenComplex<#float_path> + } + }) + } + TypeKind::Function(ref fs) => { + // We can't rely on the sizeof(Option>) == + // sizeof(NonZero<_>) optimization with opaque blobs (because + // they aren't NonZero), so don't *ever* use an or_opaque + // variant here. + let ty = fs.try_to_rust_ty(ctx, &())?; + + let prefix = ctx.trait_prefix(); + Ok(quote! { + ::#prefix::option::Option<#ty> + }) + } + TypeKind::Array(item, len) | TypeKind::Vector(item, len) => { + let ty = item.try_to_rust_ty(ctx, &())?; + Ok(quote! { + [ #ty ; #len ] + }) + } + TypeKind::Enum(..) => { + let path = item.namespace_aware_canonical_path(ctx); + let path = proc_macro2::TokenStream::from_str(&path.join("::")) + .unwrap(); + Ok(quote!(#path)) + } + TypeKind::TemplateInstantiation(ref inst) => { + inst.try_to_rust_ty(ctx, item) + } + TypeKind::ResolvedTypeRef(inner) => inner.try_to_rust_ty(ctx, &()), + TypeKind::TemplateAlias(..) | + TypeKind::Alias(..) | + TypeKind::BlockPointer(..) => { + if self.is_block_pointer() && !ctx.options().generate_block { + let void = c_void(ctx); + return Ok(void.to_ptr(/* is_const = */ false)); + } + + if item.is_opaque(ctx, &()) && + item.used_template_params(ctx) + .into_iter() + .any(|param| param.is_template_param(ctx, &())) + { + self.try_to_opaque(ctx, item) + } else if let Some(ty) = self + .name() + .and_then(|name| utils::type_from_named(ctx, name)) + { + Ok(ty) + } else { + utils::build_path(item, ctx) + } + } + TypeKind::Comp(ref info) => { + let template_params = item.all_template_params(ctx); + if info.has_non_type_template_params() || + (item.is_opaque(ctx, &()) && !template_params.is_empty()) + { + return self.try_to_opaque(ctx, item); + } + + utils::build_path(item, ctx) + } + TypeKind::Opaque => self.try_to_opaque(ctx, item), + TypeKind::Pointer(inner) | TypeKind::Reference(inner) => { + let is_const = ctx.resolve_type(inner).is_const(); + + let inner = + inner.into_resolver().through_type_refs().resolve(ctx); + let inner_ty = inner.expect_type(); + + let is_objc_pointer = + matches!(inner_ty.kind(), TypeKind::ObjCInterface(..)); + + // Regardless if we can properly represent the inner type, we + // should always generate a proper pointer here, so use + // infallible conversion of the inner type. + let mut ty = inner.to_rust_ty_or_opaque(ctx, &()); + ty.append_implicit_template_params(ctx, inner); + + // Avoid the first function pointer level, since it's already + // represented in Rust. + if inner_ty.canonical_type(ctx).is_function() || is_objc_pointer + { + Ok(ty) + } else { + Ok(ty.to_ptr(is_const)) + } + } + TypeKind::TypeParam => { + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(name); + Ok(quote! { + #ident + }) + } + TypeKind::ObjCSel => Ok(quote! { + objc::runtime::Sel + }), + TypeKind::ObjCId => Ok(quote! { + id + }), + TypeKind::ObjCInterface(ref interface) => { + let name = ctx.rust_ident(interface.name()); + Ok(quote! { + #name + }) + } + ref u @ TypeKind::UnresolvedTypeRef(..) => { + unreachable!("Should have been resolved after parsing {:?}!", u) + } + } + } +} + +impl TryToOpaque for TemplateInstantiation { + type Extra = Item; + + fn try_get_layout( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + item.expect_type() + .layout(ctx) + .ok_or(error::Error::NoLayoutForOpaqueBlob) + } +} + +impl TryToRustTy for TemplateInstantiation { + type Extra = Item; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + if self.is_opaque(ctx, item) { + return Err(error::Error::InstantiationOfOpaqueType); + } + + let def = self + .template_definition() + .into_resolver() + .through_type_refs() + .resolve(ctx); + + let mut ty = quote! {}; + let def_path = def.namespace_aware_canonical_path(ctx); + ty.append_separated( + def_path.into_iter().map(|p| ctx.rust_ident(p)), + quote!(::), + ); + + let def_params = def.self_template_params(ctx); + if def_params.is_empty() { + // This can happen if we generated an opaque type for a partial + // template specialization, and we've hit an instantiation of + // that partial specialization. + extra_assert!(def.is_opaque(ctx, &())); + return Err(error::Error::InstantiationOfOpaqueType); + } + + // TODO: If the definition type is a template class/struct + // definition's member template definition, it could rely on + // generic template parameters from its outer template + // class/struct. When we emit bindings for it, it could require + // *more* type arguments than we have here, and we will need to + // reconstruct them somehow. We don't have any means of doing + // that reconstruction at this time. + + let template_args = self + .template_arguments() + .iter() + .zip(def_params.iter()) + // Only pass type arguments for the type parameters that + // the def uses. + .filter(|&(_, param)| ctx.uses_template_parameter(def.id(), *param)) + .map(|(arg, _)| { + let arg = arg.into_resolver().through_type_refs().resolve(ctx); + let mut ty = arg.try_to_rust_ty(ctx, &())?; + ty.append_implicit_template_params(ctx, arg); + Ok(ty) + }) + .collect::>>()?; + + if template_args.is_empty() { + return Ok(ty); + } + + Ok(quote! { + #ty < #( #template_args ),* > + }) + } +} + +impl TryToRustTy for FunctionSig { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + // TODO: we might want to consider ignoring the reference return value. + let ret = utils::fnsig_return_ty(ctx, self); + let arguments = utils::fnsig_arguments(ctx, self); + + match self.abi(ctx, None) { + ClangAbi::Known(Abi::ThisCall) + if !ctx.options().rust_features().thiscall_abi => + { + warn!("Skipping function with thiscall ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::Vectorcall) + if !ctx.options().rust_features().vectorcall_abi => + { + warn!("Skipping function with vectorcall ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::CUnwind) + if !ctx.options().rust_features().c_unwind_abi => + { + warn!("Skipping function with C-unwind ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::EfiApi) + if !ctx.options().rust_features().abi_efiapi => + { + warn!("Skipping function with efiapi ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + abi => Ok(quote! { + unsafe extern #abi fn ( #( #arguments ),* ) #ret + }), + } + } +} + +impl CodeGenerator for Function { + type Extra = Item; + + /// If we've actually generated the symbol, the number of times we've seen + /// it. + type Return = Option; + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) -> Self::Return { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let is_internal = matches!(self.linkage(), Linkage::Internal); + + if is_internal && !ctx.options().wrap_static_fns { + // We can't do anything with Internal functions if we are not wrapping them so just + // avoid generating anything for them. + return None; + } + + // Pure virtual methods have no actual symbol, so we can't generate + // something meaningful for them. + let is_dynamic_function = match self.kind() { + FunctionKind::Method(ref method_kind) + if method_kind.is_pure_virtual() => + { + return None; + } + FunctionKind::Function => { + ctx.options().dynamic_library_name.is_some() + } + _ => false, + }; + + // Similar to static member variables in a class template, we can't + // generate bindings to template functions, because the set of + // instantiations is open ended and we have no way of knowing which + // monomorphizations actually exist. + if !item.all_template_params(ctx).is_empty() { + return None; + } + + let name = self.name(); + let mut canonical_name = item.canonical_name(ctx); + let mangled_name = self.mangled_name(); + + { + let seen_symbol_name = mangled_name.unwrap_or(&canonical_name); + + // TODO: Maybe warn here if there's a type/argument mismatch, or + // something? + if result.seen_function(seen_symbol_name) { + return None; + } + result.saw_function(seen_symbol_name); + } + + let signature_item = ctx.resolve_item(self.signature()); + let signature = signature_item.kind().expect_type().canonical_type(ctx); + let signature = match *signature.kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("Signature kind is not a Function: {:?}", signature), + }; + + let args = utils::fnsig_arguments(ctx, signature); + let ret = utils::fnsig_return_ty(ctx, signature); + + let mut attributes = vec![]; + + if ctx.options().rust_features().must_use_function { + let must_use = signature.must_use() || { + let ret_ty = signature + .return_type() + .into_resolver() + .through_type_refs() + .resolve(ctx); + ret_ty.must_use(ctx) + }; + + if must_use { + attributes.push(attributes::must_use()); + } + } + + if let Some(comment) = item.comment(ctx) { + attributes.push(attributes::doc(comment)); + } + + let abi = match signature.abi(ctx, Some(name)) { + ClangAbi::Known(Abi::ThisCall) + if !ctx.options().rust_features().thiscall_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "thiscall", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::Vectorcall) + if !ctx.options().rust_features().vectorcall_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "vectorcall", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::CUnwind) + if !ctx.options().rust_features().c_unwind_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "C-unwind", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::EfiApi) + if !ctx.options().rust_features().abi_efiapi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "efiapi", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::Win64) if signature.is_variadic() => { + unsupported_abi_diagnostic::( + name, + item.location(), + "Win64", + ctx, + ); + return None; + } + ClangAbi::Unknown(unknown_abi) => { + panic!( + "Invalid or unknown abi {:?} for function {:?} ({:?})", + unknown_abi, canonical_name, self + ); + } + abi => abi, + }; + + if is_internal && ctx.options().wrap_static_fns { + result.items_to_serialize.push(item.id()); + } + + // Handle overloaded functions by giving each overload its own unique + // suffix. + let times_seen = result.overload_number(&canonical_name); + if times_seen > 0 { + write!(&mut canonical_name, "{}", times_seen).unwrap(); + } + + let mut has_link_name_attr = false; + if let Some(link_name) = self.link_name() { + attributes.push(attributes::link_name::(link_name)); + has_link_name_attr = true; + } else { + let link_name = mangled_name.unwrap_or(name); + if !is_dynamic_function && + !utils::names_will_be_identical_after_mangling( + &canonical_name, + link_name, + Some(abi), + ) + { + attributes.push(attributes::link_name::(link_name)); + has_link_name_attr = true; + } + } + + // Unfortunately this can't piggyback on the `attributes` list because + // the #[link(wasm_import_module)] needs to happen before the `extern + // "C"` block. It doesn't get picked up properly otherwise + let wasm_link_attribute = + ctx.options().wasm_import_module_name.as_ref().map(|name| { + quote! { #[link(wasm_import_module = #name)] } + }); + + if is_internal && ctx.options().wrap_static_fns && !has_link_name_attr { + let name = canonical_name.clone() + ctx.wrap_static_fns_suffix(); + attributes.push(attributes::link_name::(&name)); + } + + let ident = ctx.rust_ident(canonical_name); + let tokens = quote! { + #wasm_link_attribute + extern #abi { + #(#attributes)* + pub fn #ident ( #( #args ),* ) #ret; + } + }; + + // If we're doing dynamic binding generation, add to the dynamic items. + if is_dynamic_function { + let args_identifiers = + utils::fnsig_argument_identifiers(ctx, signature); + let ret_ty = utils::fnsig_return_ty(ctx, signature); + result.dynamic_items().push( + ident, + abi, + signature.is_variadic(), + ctx.options().dynamic_link_require_all, + args, + args_identifiers, + ret, + ret_ty, + attributes, + ctx, + ); + } else { + result.push(tokens); + } + Some(times_seen) + } +} + +fn unsupported_abi_diagnostic( + fn_name: &str, + _location: Option<&crate::clang::SourceLocation>, + abi: &str, + _ctx: &BindgenContext, +) { + warn!( + "Skipping {}function `{}` with the {} ABI that isn't supported by the configured Rust target", + if VARIADIC { "variadic " } else { "" }, + fn_name, + abi + ); + + #[cfg(feature = "experimental")] + if _ctx.options().emit_diagnostics { + use crate::diagnostics::{get_line, Diagnostic, Level, Slice}; + + let mut diag = Diagnostic::default(); + diag + .with_title(format!( + "The `{}` {}function uses the {} ABI which is not supported by the configured Rust target.", + fn_name, + if VARIADIC { "variadic " } else { "" }, + abi), Level::Warn) + .add_annotation("No code will be generated for this function.", Level::Warn) + .add_annotation(format!("The configured Rust version is {}.", String::from(_ctx.options().rust_target)), Level::Note); + + if let Some(loc) = _location { + let (file, line, col, _) = loc.location(); + + if let Some(filename) = file.name() { + if let Ok(Some(source)) = get_line(&filename, line) { + let mut slice = Slice::default(); + slice + .with_source(source) + .with_location(filename, line, col); + diag.add_slice(slice); + } + } + } + + diag.display() + } +} + +fn objc_method_codegen( + ctx: &BindgenContext, + method: &ObjCMethod, + methods: &mut Vec, + class_name: Option<&str>, + rust_class_name: &str, + prefix: &str, +) { + // This would ideally resolve the method into an Item, and use + // Item::process_before_codegen; however, ObjC methods are not currently + // made into function items. + let name = format!("{}::{}{}", rust_class_name, prefix, method.rust_name()); + if ctx.options().blocklisted_items.matches(name) { + return; + } + + let signature = method.signature(); + let fn_args = utils::fnsig_arguments(ctx, signature); + let fn_ret = utils::fnsig_return_ty(ctx, signature); + + let sig = if method.is_class_method() { + quote! { + ( #( #fn_args ),* ) #fn_ret + } + } else { + let self_arr = [quote! { &self }]; + let args = self_arr.iter().chain(fn_args.iter()); + quote! { + ( #( #args ),* ) #fn_ret + } + }; + + let methods_and_args = method.format_method_call(&fn_args); + + let body = { + let body = if method.is_class_method() { + let class_name = ctx.rust_ident( + class_name + .expect("Generating a class method without class name?"), + ); + quote!(msg_send!(class!(#class_name), #methods_and_args)) + } else { + quote!(msg_send!(*self, #methods_and_args)) + }; + + ctx.wrap_unsafe_ops(body) + }; + + let method_name = + ctx.rust_ident(format!("{}{}", prefix, method.rust_name())); + + methods.push(quote! { + unsafe fn #method_name #sig where ::Target: objc::Message + Sized { + #body + } + }); +} + +impl CodeGenerator for ObjCInterface { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let mut impl_items = vec![]; + let rust_class_name = item.path_for_allowlisting(ctx)[1..].join("::"); + + for method in self.methods() { + objc_method_codegen( + ctx, + method, + &mut impl_items, + None, + &rust_class_name, + "", + ); + } + + for class_method in self.class_methods() { + let ambiquity = self + .methods() + .iter() + .map(|m| m.rust_name()) + .any(|x| x == class_method.rust_name()); + let prefix = if ambiquity { "class_" } else { "" }; + objc_method_codegen( + ctx, + class_method, + &mut impl_items, + Some(self.name()), + &rust_class_name, + prefix, + ); + } + + let trait_name = ctx.rust_ident(self.rust_name()); + let trait_constraints = quote! { + Sized + std::ops::Deref + }; + let trait_block = if self.is_template() { + let template_names: Vec = self + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + + quote! { + pub trait #trait_name <#(#template_names:'static),*> : #trait_constraints { + #( #impl_items )* + } + } + } else { + quote! { + pub trait #trait_name : #trait_constraints { + #( #impl_items )* + } + } + }; + + let class_name = ctx.rust_ident(self.name()); + if !self.is_category() && !self.is_protocol() { + let struct_block = quote! { + #[repr(transparent)] + #[derive(Debug, Copy, Clone)] + pub struct #class_name(pub id); + impl std::ops::Deref for #class_name { + type Target = objc::runtime::Object; + fn deref(&self) -> &Self::Target { + unsafe { + &*self.0 + } + } + } + unsafe impl objc::Message for #class_name { } + impl #class_name { + pub fn alloc() -> Self { + Self(unsafe { + msg_send!(class!(#class_name), alloc) + }) + } + } + }; + result.push(struct_block); + let mut protocol_set: HashSet = Default::default(); + for protocol_id in self.conforms_to.iter() { + protocol_set.insert(*protocol_id); + let protocol_name = ctx.rust_ident( + ctx.resolve_type(protocol_id.expect_type_id(ctx)) + .name() + .unwrap(), + ); + let impl_trait = quote! { + impl #protocol_name for #class_name { } + }; + result.push(impl_trait); + } + let mut parent_class = self.parent_class; + while let Some(parent_id) = parent_class { + let parent = parent_id + .expect_type_id(ctx) + .into_resolver() + .through_type_refs() + .resolve(ctx) + .expect_type() + .kind(); + + let parent = match parent { + TypeKind::ObjCInterface(ref parent) => parent, + _ => break, + }; + parent_class = parent.parent_class; + + let parent_name = ctx.rust_ident(parent.rust_name()); + let impl_trait = if parent.is_template() { + let template_names: Vec = parent + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + quote! { + impl <#(#template_names :'static),*> #parent_name <#(#template_names),*> for #class_name { + } + } + } else { + quote! { + impl #parent_name for #class_name { } + } + }; + result.push(impl_trait); + for protocol_id in parent.conforms_to.iter() { + if protocol_set.insert(*protocol_id) { + let protocol_name = ctx.rust_ident( + ctx.resolve_type(protocol_id.expect_type_id(ctx)) + .name() + .unwrap(), + ); + let impl_trait = quote! { + impl #protocol_name for #class_name { } + }; + result.push(impl_trait); + } + } + if !parent.is_template() { + let parent_struct_name = parent.name(); + let child_struct_name = self.name(); + let parent_struct = ctx.rust_ident(parent_struct_name); + let from_block = quote! { + impl From<#class_name> for #parent_struct { + fn from(child: #class_name) -> #parent_struct { + #parent_struct(child.0) + } + } + }; + result.push(from_block); + + let error_msg = format!( + "This {} cannot be downcasted to {}", + parent_struct_name, child_struct_name + ); + let try_into_block = quote! { + impl std::convert::TryFrom<#parent_struct> for #class_name { + type Error = &'static str; + fn try_from(parent: #parent_struct) -> Result<#class_name, Self::Error> { + let is_kind_of : bool = unsafe { msg_send!(parent, isKindOfClass:class!(#class_name))}; + if is_kind_of { + Ok(#class_name(parent.0)) + } else { + Err(#error_msg) + } + } + } + }; + result.push(try_into_block); + } + } + } + + if !self.is_protocol() { + let impl_block = if self.is_template() { + let template_names: Vec = self + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + quote! { + impl <#(#template_names :'static),*> #trait_name <#(#template_names),*> for #class_name { + } + } + } else { + quote! { + impl #trait_name for #class_name { + } + } + }; + result.push(impl_block); + } + + result.push(trait_block); + result.saw_objc(); + } +} + +pub(crate) fn codegen( + context: BindgenContext, +) -> Result<(proc_macro2::TokenStream, BindgenOptions), CodegenError> { + context.gen(|context| { + let _t = context.timer("codegen"); + let counter = Cell::new(0); + let mut result = CodegenResult::new(&counter); + + debug!("codegen: {:?}", context.options()); + + if context.options().emit_ir { + let codegen_items = context.codegen_items(); + for (id, item) in context.items() { + if codegen_items.contains(&id) { + println!("ir: {:?} = {:#?}", id, item); + } + } + } + + if let Some(path) = context.options().emit_ir_graphviz.as_ref() { + match dot::write_dot_file(context, path) { + Ok(()) => info!( + "Your dot file was generated successfully into: {}", + path + ), + Err(e) => warn!("{}", e), + } + } + + if let Some(spec) = context.options().depfile.as_ref() { + match spec.write(context.deps()) { + Ok(()) => info!( + "Your depfile was generated successfully into: {}", + spec.depfile_path.display() + ), + Err(e) => warn!("{}", e), + } + } + + context.resolve_item(context.root_module()).codegen( + context, + &mut result, + &(), + ); + + if let Some(ref lib_name) = context.options().dynamic_library_name { + let lib_ident = context.rust_ident(lib_name); + let dynamic_items_tokens = + result.dynamic_items().get_tokens(lib_ident, context); + result.push(dynamic_items_tokens); + } + + utils::serialize_items(&result, context)?; + + Ok(postprocessing::postprocessing( + result.items, + context.options(), + )) + }) +} + +pub(crate) mod utils { + use super::serialize::CSerialize; + use super::{error, CodegenError, CodegenResult, ToRustTyOrOpaque}; + use crate::ir::context::BindgenContext; + use crate::ir::function::{Abi, ClangAbi, FunctionSig}; + use crate::ir::item::{Item, ItemCanonicalPath}; + use crate::ir::ty::TypeKind; + use crate::{args_are_cpp, file_is_cpp}; + use std::borrow::Cow; + use std::io::Write; + use std::mem; + use std::path::PathBuf; + use std::str::FromStr; + + pub(super) fn serialize_items( + result: &CodegenResult, + context: &BindgenContext, + ) -> Result<(), CodegenError> { + if result.items_to_serialize.is_empty() { + return Ok(()); + } + + let path = context + .options() + .wrap_static_fns_path + .as_ref() + .map(PathBuf::from) + .unwrap_or_else(|| { + std::env::temp_dir().join("bindgen").join("extern") + }); + + let dir = path.parent().unwrap(); + + if !dir.exists() { + std::fs::create_dir_all(dir)?; + } + + let is_cpp = args_are_cpp(&context.options().clang_args) || + context + .options() + .input_headers + .iter() + .any(|h| file_is_cpp(h)); + + let source_path = path.with_extension(if is_cpp { "cpp" } else { "c" }); + + let mut code = Vec::new(); + + if !context.options().input_headers.is_empty() { + for header in &context.options().input_headers { + writeln!(code, "#include \"{}\"", header)?; + } + + writeln!(code)?; + } + + if !context.options().input_header_contents.is_empty() { + for (name, contents) in &context.options().input_header_contents { + writeln!(code, "// {}\n{}", name, contents)?; + } + + writeln!(code)?; + } + + writeln!(code, "// Static wrappers\n")?; + + for &id in &result.items_to_serialize { + let item = context.resolve_item(id); + item.serialize(context, (), &mut vec![], &mut code)?; + } + + std::fs::write(source_path, code)?; + + Ok(()) + } + + pub(crate) fn prepend_bitfield_unit_type( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let bitfield_unit_src = include_str!("./bitfield_unit.rs"); + let bitfield_unit_src = if ctx.options().rust_features().min_const_fn { + Cow::Borrowed(bitfield_unit_src) + } else { + Cow::Owned(bitfield_unit_src.replace("const fn ", "fn ")) + }; + let bitfield_unit_type = + proc_macro2::TokenStream::from_str(&bitfield_unit_src).unwrap(); + let bitfield_unit_type = quote!(#bitfield_unit_type); + + let items = vec![bitfield_unit_type]; + let old_items = mem::replace(result, items); + result.extend(old_items); + } + + pub(crate) fn prepend_objc_header( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let use_objc = if ctx.options().objc_extern_crate { + quote! { + #[macro_use] + extern crate objc; + } + } else { + quote! { + use objc::{self, msg_send, sel, sel_impl, class}; + } + }; + + let id_type = quote! { + #[allow(non_camel_case_types)] + pub type id = *mut objc::runtime::Object; + }; + + let items = vec![use_objc, id_type]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_block_header( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let use_block = if ctx.options().block_extern_crate { + quote! { + extern crate block; + } + } else { + quote! { + use block; + } + }; + + let items = vec![use_block]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_union_types( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let prefix = ctx.trait_prefix(); + + // If the target supports `const fn`, declare eligible functions + // as `const fn` else just `fn`. + let const_fn = if ctx.options().rust_features().min_const_fn { + quote! { const fn } + } else { + quote! { fn } + }; + + // TODO(emilio): The fmt::Debug impl could be way nicer with + // std::intrinsics::type_name, but... + let union_field_decl = quote! { + #[repr(C)] + pub struct __BindgenUnionField(::#prefix::marker::PhantomData); + }; + + let transmute = + ctx.wrap_unsafe_ops(quote!(::#prefix::mem::transmute(self))); + + let union_field_impl = quote! { + impl __BindgenUnionField { + #[inline] + pub #const_fn new() -> Self { + __BindgenUnionField(::#prefix::marker::PhantomData) + } + + #[inline] + pub unsafe fn as_ref(&self) -> &T { + #transmute + } + + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + #transmute + } + } + }; + + let union_field_default_impl = quote! { + impl ::#prefix::default::Default for __BindgenUnionField { + #[inline] + fn default() -> Self { + Self::new() + } + } + }; + + let union_field_clone_impl = quote! { + impl ::#prefix::clone::Clone for __BindgenUnionField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } + } + }; + + let union_field_copy_impl = quote! { + impl ::#prefix::marker::Copy for __BindgenUnionField {} + }; + + let union_field_debug_impl = quote! { + impl ::#prefix::fmt::Debug for __BindgenUnionField { + fn fmt(&self, fmt: &mut ::#prefix::fmt::Formatter<'_>) + -> ::#prefix::fmt::Result { + fmt.write_str("__BindgenUnionField") + } + } + }; + + // The actual memory of the filed will be hashed, so that's why these + // field doesn't do anything with the hash. + let union_field_hash_impl = quote! { + impl ::#prefix::hash::Hash for __BindgenUnionField { + fn hash(&self, _state: &mut H) { + } + } + }; + + let union_field_partialeq_impl = quote! { + impl ::#prefix::cmp::PartialEq for __BindgenUnionField { + fn eq(&self, _other: &__BindgenUnionField) -> bool { + true + } + } + }; + + let union_field_eq_impl = quote! { + impl ::#prefix::cmp::Eq for __BindgenUnionField { + } + }; + + let items = vec![ + union_field_decl, + union_field_impl, + union_field_default_impl, + union_field_clone_impl, + union_field_copy_impl, + union_field_debug_impl, + union_field_hash_impl, + union_field_partialeq_impl, + union_field_eq_impl, + ]; + + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_incomplete_array_types( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let prefix = ctx.trait_prefix(); + + // If the target supports `const fn`, declare eligible functions + // as `const fn` else just `fn`. + let const_fn = if ctx.options().rust_features().min_const_fn { + quote! { const fn } + } else { + quote! { fn } + }; + + let incomplete_array_decl = quote! { + #[repr(C)] + #[derive(Default)] + pub struct __IncompleteArrayField( + ::#prefix::marker::PhantomData, [T; 0]); + }; + + let from_raw_parts = ctx.wrap_unsafe_ops(quote! ( + ::#prefix::slice::from_raw_parts(self.as_ptr(), len) + )); + let from_raw_parts_mut = ctx.wrap_unsafe_ops(quote! ( + ::#prefix::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + )); + + let incomplete_array_impl = quote! { + impl __IncompleteArrayField { + #[inline] + pub #const_fn new() -> Self { + __IncompleteArrayField(::#prefix::marker::PhantomData, []) + } + + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + #from_raw_parts + } + + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + #from_raw_parts_mut + } + } + }; + + let incomplete_array_debug_impl = quote! { + impl ::#prefix::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::#prefix::fmt::Formatter<'_>) + -> ::#prefix::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } + } + }; + + let items = vec![ + incomplete_array_decl, + incomplete_array_impl, + incomplete_array_debug_impl, + ]; + + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_complex_type( + result: &mut Vec, + ) { + let complex_type = quote! { + #[derive(PartialEq, Copy, Clone, Hash, Debug, Default)] + #[repr(C)] + pub struct __BindgenComplex { + pub re: T, + pub im: T + } + }; + + let items = vec![complex_type]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn build_path( + item: &Item, + ctx: &BindgenContext, + ) -> error::Result { + let path = item.namespace_aware_canonical_path(ctx); + let tokens = + proc_macro2::TokenStream::from_str(&path.join("::")).unwrap(); + + Ok(tokens) + } + + fn primitive_ty( + ctx: &BindgenContext, + name: &str, + ) -> proc_macro2::TokenStream { + let ident = ctx.rust_ident_raw(name); + quote! { + #ident + } + } + + pub(crate) fn type_from_named( + ctx: &BindgenContext, + name: &str, + ) -> Option { + // FIXME: We could use the inner item to check this is really a + // primitive type but, who the heck overrides these anyway? + Some(match name { + "int8_t" => primitive_ty(ctx, "i8"), + "uint8_t" => primitive_ty(ctx, "u8"), + "int16_t" => primitive_ty(ctx, "i16"), + "uint16_t" => primitive_ty(ctx, "u16"), + "int32_t" => primitive_ty(ctx, "i32"), + "uint32_t" => primitive_ty(ctx, "u32"), + "int64_t" => primitive_ty(ctx, "i64"), + "uint64_t" => primitive_ty(ctx, "u64"), + + "size_t" if ctx.options().size_t_is_usize => { + primitive_ty(ctx, "usize") + } + "uintptr_t" => primitive_ty(ctx, "usize"), + + "ssize_t" if ctx.options().size_t_is_usize => { + primitive_ty(ctx, "isize") + } + "intptr_t" | "ptrdiff_t" => primitive_ty(ctx, "isize"), + _ => return None, + }) + } + + fn fnsig_return_ty_internal( + ctx: &BindgenContext, + sig: &FunctionSig, + include_arrow: bool, + ) -> proc_macro2::TokenStream { + if sig.is_divergent() { + return if include_arrow { + quote! { -> ! } + } else { + quote! { ! } + }; + } + + let canonical_type_kind = sig + .return_type() + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(ctx) + .kind() + .expect_type() + .kind(); + + if let TypeKind::Void = canonical_type_kind { + return if include_arrow { + quote! {} + } else { + quote! { () } + }; + } + + let ret_ty = sig.return_type().to_rust_ty_or_opaque(ctx, &()); + if include_arrow { + quote! { -> #ret_ty } + } else { + ret_ty + } + } + + pub(crate) fn fnsig_return_ty( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> proc_macro2::TokenStream { + fnsig_return_ty_internal(ctx, sig, /* include_arrow = */ true) + } + + pub(crate) fn fnsig_arguments( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> Vec { + use super::ToPtr; + + let mut unnamed_arguments = 0; + let mut args = sig + .argument_types() + .iter() + .map(|&(ref name, ty)| { + let arg_item = ctx.resolve_item(ty); + let arg_ty = arg_item.kind().expect_type(); + + // From the C90 standard[1]: + // + // A declaration of a parameter as "array of type" shall be + // adjusted to "qualified pointer to type", where the type + // qualifiers (if any) are those specified within the [ and ] of + // the array type derivation. + // + // [1]: http://c0x.coding-guidelines.com/6.7.5.3.html + let arg_ty = match *arg_ty.canonical_type(ctx).kind() { + TypeKind::Array(t, _) => { + let stream = + if ctx.options().array_pointers_in_arguments { + arg_ty.to_rust_ty_or_opaque(ctx, arg_item) + } else { + t.to_rust_ty_or_opaque(ctx, &()) + }; + stream.to_ptr(ctx.resolve_type(t).is_const()) + } + TypeKind::Pointer(inner) => { + let inner = ctx.resolve_item(inner); + let inner_ty = inner.expect_type(); + if let TypeKind::ObjCInterface(ref interface) = + *inner_ty.canonical_type(ctx).kind() + { + let name = ctx.rust_ident(interface.name()); + quote! { + #name + } + } else { + arg_item.to_rust_ty_or_opaque(ctx, &()) + } + } + _ => arg_item.to_rust_ty_or_opaque(ctx, &()), + }; + + let arg_name = match *name { + Some(ref name) => ctx.rust_mangle(name).into_owned(), + None => { + unnamed_arguments += 1; + format!("arg{}", unnamed_arguments) + } + }; + + assert!(!arg_name.is_empty()); + let arg_name = ctx.rust_ident(arg_name); + + quote! { + #arg_name : #arg_ty + } + }) + .collect::>(); + + if sig.is_variadic() { + args.push(quote! { ... }) + } + + args + } + + pub(crate) fn fnsig_argument_identifiers( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> Vec { + let mut unnamed_arguments = 0; + let args = sig + .argument_types() + .iter() + .map(|&(ref name, _ty)| { + let arg_name = match *name { + Some(ref name) => ctx.rust_mangle(name).into_owned(), + None => { + unnamed_arguments += 1; + format!("arg{}", unnamed_arguments) + } + }; + + assert!(!arg_name.is_empty()); + let arg_name = ctx.rust_ident(arg_name); + + quote! { + #arg_name + } + }) + .collect::>(); + + args + } + + pub(crate) fn fnsig_block( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> proc_macro2::TokenStream { + let args = sig.argument_types().iter().map(|&(_, ty)| { + let arg_item = ctx.resolve_item(ty); + + arg_item.to_rust_ty_or_opaque(ctx, &()) + }); + + let ret_ty = fnsig_return_ty_internal( + ctx, sig, /* include_arrow = */ false, + ); + quote! { + *const ::block::Block<(#(#args,)*), #ret_ty> + } + } + + // Returns true if `canonical_name` will end up as `mangled_name` at the + // machine code level, i.e. after LLVM has applied any target specific + // mangling. + pub(crate) fn names_will_be_identical_after_mangling( + canonical_name: &str, + mangled_name: &str, + call_conv: Option, + ) -> bool { + // If the mangled name and the canonical name are the same then no + // mangling can have happened between the two versions. + if canonical_name == mangled_name { + return true; + } + + // Working with &[u8] makes indexing simpler than with &str + let canonical_name = canonical_name.as_bytes(); + let mangled_name = mangled_name.as_bytes(); + + let (mangling_prefix, expect_suffix) = match call_conv { + Some(ClangAbi::Known(Abi::C)) | + // None is the case for global variables + None => { + (b'_', false) + } + Some(ClangAbi::Known(Abi::Stdcall)) => (b'_', true), + Some(ClangAbi::Known(Abi::Fastcall)) => (b'@', true), + + // This is something we don't recognize, stay on the safe side + // by emitting the `#[link_name]` attribute + Some(_) => return false, + }; + + // Check that the mangled name is long enough to at least contain the + // canonical name plus the expected prefix. + if mangled_name.len() < canonical_name.len() + 1 { + return false; + } + + // Return if the mangled name does not start with the prefix expected + // for the given calling convention. + if mangled_name[0] != mangling_prefix { + return false; + } + + // Check that the mangled name contains the canonical name after the + // prefix + if &mangled_name[1..canonical_name.len() + 1] != canonical_name { + return false; + } + + // If the given calling convention also prescribes a suffix, check that + // it exists too + if expect_suffix { + let suffix = &mangled_name[canonical_name.len() + 1..]; + + // The shortest suffix is "@0" + if suffix.len() < 2 { + return false; + } + + // Check that the suffix starts with '@' and is all ASCII decimals + // after that. + if suffix[0] != b'@' || !suffix[1..].iter().all(u8::is_ascii_digit) + { + return false; + } + } else if mangled_name.len() != canonical_name.len() + 1 { + // If we don't expect a prefix but there is one, we need the + // #[link_name] attribute + return false; + } + + true + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs 2023-08-25 21:18:50.000000000 +0000 @@ -0,0 +1,72 @@ +use syn::{ + visit_mut::{visit_file_mut, visit_item_mod_mut, VisitMut}, + File, Item, ItemForeignMod, ItemMod, +}; + +pub(super) fn merge_extern_blocks(file: &mut File) { + Visitor.visit_file_mut(file) +} + +struct Visitor; + +impl VisitMut for Visitor { + fn visit_file_mut(&mut self, file: &mut File) { + visit_items(&mut file.items); + visit_file_mut(self, file) + } + + fn visit_item_mod_mut(&mut self, item_mod: &mut ItemMod) { + if let Some((_, ref mut items)) = item_mod.content { + visit_items(items); + } + visit_item_mod_mut(self, item_mod) + } +} + +fn visit_items(items: &mut Vec) { + // Keep all the extern blocks in a different `Vec` for faster search. + let mut extern_blocks = Vec::::new(); + + for item in std::mem::take(items) { + if let Item::ForeignMod(ItemForeignMod { + attrs, + abi, + brace_token, + unsafety, + items: extern_block_items, + }) = item + { + let mut exists = false; + for extern_block in &mut extern_blocks { + // Check if there is a extern block with the same ABI and + // attributes. + if extern_block.attrs == attrs && extern_block.abi == abi { + // Merge the items of the two blocks. + extern_block.items.extend_from_slice(&extern_block_items); + exists = true; + break; + } + } + // If no existing extern block had the same ABI and attributes, store + // it. + if !exists { + extern_blocks.push(ItemForeignMod { + attrs, + abi, + brace_token, + unsafety, + items: extern_block_items, + }); + } + } else { + // If the item is not an extern block, we don't have to do anything and just + // push it back. + items.push(item); + } + } + + // Move all the extern blocks alongside the rest of the items. + for extern_block in extern_blocks { + items.push(Item::ForeignMod(extern_block)); + } +} diff -Nru libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs --- libclamunrar-1.0.0/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs 1970-01-01 00:00:00.000000000 +0000 +++ libclamunrar-1.0.3/libclamav_rust/.cargo/vendor/bindgen/codegen/postpr