Version in base suite: 1.0.1+dfsg-2 Base version: clamav_1.0.1+dfsg-2 Target version: clamav_1.0.2+dfsg-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/c/clamav/clamav_1.0.1+dfsg-2.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/c/clamav/clamav_1.0.2+dfsg-1~deb12u1.dsc .gitattributes | 3 .gitignore | 4 CMakeLists.txt | 2 Cargo.lock | 97 Jenkinsfile | 4 NEWS.md | 59 clamsubmit/CMakeLists.txt | 2 cmake/FindRust.cmake | 26 debian/.git-dpm | 14 debian/changelog | 13 debian/control | 3 debian/libclamav11.symbols | 432 debian/patches/Add-a-version-script-for-libclamav-and-libfreshclam.patch | 8 debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch | 8 debian/patches/Change-paths-in-sample-conf-file-to-match-Debian.patch | 2 debian/patches/Remove-bundled-tomfastmath-library.patch | 243 debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch | 231 debian/patches/cargo-Remove-windows-referenfes.patch | 44 debian/patches/clamd_dont_depend_on_clamav_demon_socket.patch | 35 debian/patches/libclamav-Add-missing-symbols.patch | 2 debian/patches/libclamav-Sort-libclamav.map-and-libfreshclam.map.patch | 2 debian/patches/libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch | 1132 ++ debian/patches/libclamav-pe-Use-endian-wrapper-in-more-places.patch | 2 debian/patches/series | 4 debian/rules | 6 debian/split-tarball.sh | 50 debian/upstream/signing-key.asc | 106 libclamav/autoit.c | 14 libclamav/bytecode_api.h | 1 libclamav/hfsplus.c | 5 libclamav/matcher-ac.c | 1 libclamav/matcher-bm.c | 1 libclamav/readdb.c | 8 libclamav/rtf.c | 6 libclamav/scanners.c | 21 libclamav/vba_extract.c | 14 libclamav/vba_extract.h | 2 libclamav/xlm_extract.c | 3 libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/aho-corasick/COPYING | 3 libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml | 50 libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md | 483 libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT | 21 libclamav_rust/.cargo/vendor/aho-corasick/README.md | 187 libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE | 24 libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml | 2 libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs | 2141 --- libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs | 573 - libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs | 132 libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs | 258 libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs | 238 libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs | 713 - libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs | 101 libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs | 303 libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs | 1214 -- libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs | 625 - libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs | 117 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs | 318 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs | 185 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md | 386 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs | 414 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs | 62 libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs | 1204 -- libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs | 568 - libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs | 181 libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs | 1057 - libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs | 192 libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs | 1254 -- libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/bindgen/Cargo.lock | 446 libclamav_rust/.cargo/vendor/bindgen/Cargo.toml | 89 libclamav_rust/.cargo/vendor/bindgen/README.md | 83 libclamav_rust/.cargo/vendor/bindgen/build.rs | 83 libclamav_rust/.cargo/vendor/bindgen/callbacks.rs | 178 libclamav_rust/.cargo/vendor/bindgen/clang.rs | 2236 +++ libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs | 102 libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs | 260 libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs | 201 libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs | 33 libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs | 322 libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs | 245 libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs | 142 libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs | 5366 +++++++++ libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs | 72 libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs | 57 libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs | 46 libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs | 358 libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs | 444 libclamav_rust/.cargo/vendor/bindgen/csmith-fuzzing/README.md | 65 libclamav_rust/.cargo/vendor/bindgen/deps.rs | 20 libclamav_rust/.cargo/vendor/bindgen/diagnostics.rs | 189 libclamav_rust/.cargo/vendor/bindgen/extra_assertions.rs | 34 libclamav_rust/.cargo/vendor/bindgen/features.rs | 323 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/derive.rs | 732 + libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_destructor.rs | 176 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_float.rs | 252 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_type_param_in_array.rs | 252 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/has_vtable.rs | 240 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/mod.rs | 407 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/sizedness.rs | 361 libclamav_rust/.cargo/vendor/bindgen/ir/analysis/template_params.rs | 607 + libclamav_rust/.cargo/vendor/bindgen/ir/annotations.rs | 256 libclamav_rust/.cargo/vendor/bindgen/ir/comment.rs | 100 libclamav_rust/.cargo/vendor/bindgen/ir/comp.rs | 1875 +++ libclamav_rust/.cargo/vendor/bindgen/ir/context.rs | 2981 +++++ libclamav_rust/.cargo/vendor/bindgen/ir/derive.rs | 135 libclamav_rust/.cargo/vendor/bindgen/ir/dot.rs | 86 libclamav_rust/.cargo/vendor/bindgen/ir/enum_ty.rs | 323 libclamav_rust/.cargo/vendor/bindgen/ir/function.rs | 787 + libclamav_rust/.cargo/vendor/bindgen/ir/int.rs | 127 libclamav_rust/.cargo/vendor/bindgen/ir/item.rs | 2026 +++ libclamav_rust/.cargo/vendor/bindgen/ir/item_kind.rs | 135 libclamav_rust/.cargo/vendor/bindgen/ir/layout.rs | 136 libclamav_rust/.cargo/vendor/bindgen/ir/mod.rs | 25 libclamav_rust/.cargo/vendor/bindgen/ir/module.rs | 95 libclamav_rust/.cargo/vendor/bindgen/ir/objc.rs | 335 libclamav_rust/.cargo/vendor/bindgen/ir/template.rs | 342 libclamav_rust/.cargo/vendor/bindgen/ir/traversal.rs | 479 libclamav_rust/.cargo/vendor/bindgen/ir/ty.rs | 1273 ++ libclamav_rust/.cargo/vendor/bindgen/ir/var.rs | 488 libclamav_rust/.cargo/vendor/bindgen/lib.rs | 1300 ++ libclamav_rust/.cargo/vendor/bindgen/log_stubs.rs | 32 libclamav_rust/.cargo/vendor/bindgen/options/as_args.rs | 52 libclamav_rust/.cargo/vendor/bindgen/options/helpers.rs | 43 libclamav_rust/.cargo/vendor/bindgen/options/mod.rs | 2008 +++ libclamav_rust/.cargo/vendor/bindgen/parse.rs | 41 libclamav_rust/.cargo/vendor/bindgen/regex_set.rs | 204 libclamav_rust/.cargo/vendor/bindgen/src/callbacks.rs | 106 libclamav_rust/.cargo/vendor/bindgen/src/clang.rs | 2093 --- libclamav_rust/.cargo/vendor/bindgen/src/codegen/bitfield_unit.rs | 102 libclamav_rust/.cargo/vendor/bindgen/src/codegen/bitfield_unit_tests.rs | 260 libclamav_rust/.cargo/vendor/bindgen/src/codegen/dyngen.rs | 178 libclamav_rust/.cargo/vendor/bindgen/src/codegen/error.rs | 33 libclamav_rust/.cargo/vendor/bindgen/src/codegen/helpers.rs | 299 libclamav_rust/.cargo/vendor/bindgen/src/codegen/impl_debug.rs | 245 libclamav_rust/.cargo/vendor/bindgen/src/codegen/impl_partialeq.rs | 142 libclamav_rust/.cargo/vendor/bindgen/src/codegen/mod.rs | 4835 -------- libclamav_rust/.cargo/vendor/bindgen/src/codegen/struct_layout.rs | 438 libclamav_rust/.cargo/vendor/bindgen/src/deps.rs | 20 libclamav_rust/.cargo/vendor/bindgen/src/extra_assertions.rs | 34 libclamav_rust/.cargo/vendor/bindgen/src/features.rs | 302 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/derive.rs | 732 - libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_destructor.rs | 176 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_float.rs | 252 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_type_param_in_array.rs | 252 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/has_vtable.rs | 240 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/mod.rs | 398 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/sizedness.rs | 361 libclamav_rust/.cargo/vendor/bindgen/src/ir/analysis/template_params.rs | 608 - libclamav_rust/.cargo/vendor/bindgen/src/ir/annotations.rs | 211 libclamav_rust/.cargo/vendor/bindgen/src/ir/comment.rs | 119 libclamav_rust/.cargo/vendor/bindgen/src/ir/comp.rs | 1854 --- libclamav_rust/.cargo/vendor/bindgen/src/ir/context.rs | 2835 ----- libclamav_rust/.cargo/vendor/bindgen/src/ir/derive.rs | 135 libclamav_rust/.cargo/vendor/bindgen/src/ir/dot.rs | 86 libclamav_rust/.cargo/vendor/bindgen/src/ir/enum_ty.rs | 305 libclamav_rust/.cargo/vendor/bindgen/src/ir/function.rs | 652 - libclamav_rust/.cargo/vendor/bindgen/src/ir/int.rs | 127 libclamav_rust/.cargo/vendor/bindgen/src/ir/item.rs | 2008 --- libclamav_rust/.cargo/vendor/bindgen/src/ir/item_kind.rs | 147 libclamav_rust/.cargo/vendor/bindgen/src/ir/layout.rs | 143 libclamav_rust/.cargo/vendor/bindgen/src/ir/mod.rs | 24 libclamav_rust/.cargo/vendor/bindgen/src/ir/module.rs | 95 libclamav_rust/.cargo/vendor/bindgen/src/ir/objc.rs | 329 libclamav_rust/.cargo/vendor/bindgen/src/ir/template.rs | 343 libclamav_rust/.cargo/vendor/bindgen/src/ir/traversal.rs | 508 libclamav_rust/.cargo/vendor/bindgen/src/ir/ty.rs | 1250 -- libclamav_rust/.cargo/vendor/bindgen/src/ir/var.rs | 455 libclamav_rust/.cargo/vendor/bindgen/src/lib.rs | 2729 ---- libclamav_rust/.cargo/vendor/bindgen/src/log_stubs.rs | 32 libclamav_rust/.cargo/vendor/bindgen/src/main.rs | 113 libclamav_rust/.cargo/vendor/bindgen/src/options.rs | 1000 - libclamav_rust/.cargo/vendor/bindgen/src/parse.rs | 102 libclamav_rust/.cargo/vendor/bindgen/src/regex_set.rs | 92 libclamav_rust/.cargo/vendor/bindgen/src/time.rs | 52 libclamav_rust/.cargo/vendor/bindgen/time.rs | 52 libclamav_rust/.cargo/vendor/env_logger/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/env_logger/CHANGELOG.md | 3 libclamav_rust/.cargo/vendor/env_logger/Cargo.toml | 85 libclamav_rust/.cargo/vendor/env_logger/LICENSE-APACHE | 201 libclamav_rust/.cargo/vendor/env_logger/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/env_logger/README.md | 183 libclamav_rust/.cargo/vendor/env_logger/src/filter/mod.rs | 868 - libclamav_rust/.cargo/vendor/env_logger/src/filter/regex.rs | 29 libclamav_rust/.cargo/vendor/env_logger/src/filter/string.rs | 24 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/extern_impl.rs | 118 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/mod.rs | 11 libclamav_rust/.cargo/vendor/env_logger/src/fmt/humantime/shim_impl.rs | 5 libclamav_rust/.cargo/vendor/env_logger/src/fmt/mod.rs | 652 - libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/atty.rs | 32 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/mod.rs | 252 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/extern_impl.rs | 532 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/mod.rs | 12 libclamav_rust/.cargo/vendor/env_logger/src/fmt/writer/termcolor/shim_impl.rs | 72 libclamav_rust/.cargo/vendor/env_logger/src/lib.rs | 1311 -- libclamav_rust/.cargo/vendor/env_logger/tests/init-twice-retains-filter.rs | 40 libclamav_rust/.cargo/vendor/env_logger/tests/log-in-log.rs | 39 libclamav_rust/.cargo/vendor/env_logger/tests/log_tls_dtors.rs | 66 libclamav_rust/.cargo/vendor/env_logger/tests/regexp_filter.rs | 57 libclamav_rust/.cargo/vendor/humantime/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/humantime/Cargo.toml | 37 libclamav_rust/.cargo/vendor/humantime/LICENSE-APACHE | 202 libclamav_rust/.cargo/vendor/humantime/LICENSE-MIT | 26 libclamav_rust/.cargo/vendor/humantime/README.md | 68 libclamav_rust/.cargo/vendor/humantime/benches/datetime_format.rs | 56 libclamav_rust/.cargo/vendor/humantime/benches/datetime_parse.rs | 47 libclamav_rust/.cargo/vendor/humantime/bulk.yaml | 8 libclamav_rust/.cargo/vendor/humantime/src/date.rs | 623 - libclamav_rust/.cargo/vendor/humantime/src/duration.rs | 456 libclamav_rust/.cargo/vendor/humantime/src/lib.rs | 34 libclamav_rust/.cargo/vendor/humantime/src/wrapper.rs | 107 libclamav_rust/.cargo/vendor/humantime/vagga.yaml | 92 libclamav_rust/.cargo/vendor/prettyplease/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/prettyplease/Cargo.toml | 50 libclamav_rust/.cargo/vendor/prettyplease/LICENSE-APACHE | 176 libclamav_rust/.cargo/vendor/prettyplease/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/prettyplease/README.md | 312 libclamav_rust/.cargo/vendor/prettyplease/build.rs | 5 libclamav_rust/.cargo/vendor/prettyplease/examples/input.rs | 1 libclamav_rust/.cargo/vendor/prettyplease/examples/output.prettyplease.rs | 593 + libclamav_rust/.cargo/vendor/prettyplease/examples/output.rustc.rs | 508 libclamav_rust/.cargo/vendor/prettyplease/examples/output.rustfmt.rs | 552 libclamav_rust/.cargo/vendor/prettyplease/src/algorithm.rs | 376 libclamav_rust/.cargo/vendor/prettyplease/src/attr.rs | 287 libclamav_rust/.cargo/vendor/prettyplease/src/convenience.rs | 98 libclamav_rust/.cargo/vendor/prettyplease/src/data.rs | 78 libclamav_rust/.cargo/vendor/prettyplease/src/expr.rs | 1160 ++ libclamav_rust/.cargo/vendor/prettyplease/src/file.rs | 17 libclamav_rust/.cargo/vendor/prettyplease/src/generics.rs | 325 libclamav_rust/.cargo/vendor/prettyplease/src/item.rs | 1646 ++ libclamav_rust/.cargo/vendor/prettyplease/src/iter.rs | 46 libclamav_rust/.cargo/vendor/prettyplease/src/lib.rs | 379 libclamav_rust/.cargo/vendor/prettyplease/src/lifetime.rs | 9 libclamav_rust/.cargo/vendor/prettyplease/src/lit.rs | 52 libclamav_rust/.cargo/vendor/prettyplease/src/mac.rs | 219 libclamav_rust/.cargo/vendor/prettyplease/src/pat.rs | 246 libclamav_rust/.cargo/vendor/prettyplease/src/path.rs | 207 libclamav_rust/.cargo/vendor/prettyplease/src/ring.rs | 81 libclamav_rust/.cargo/vendor/prettyplease/src/stmt.rs | 209 libclamav_rust/.cargo/vendor/prettyplease/src/token.rs | 80 libclamav_rust/.cargo/vendor/prettyplease/src/ty.rs | 286 libclamav_rust/.cargo/vendor/proc-macro2/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/proc-macro2/Cargo.toml | 10 libclamav_rust/.cargo/vendor/proc-macro2/README.md | 2 libclamav_rust/.cargo/vendor/proc-macro2/build.rs | 74 libclamav_rust/.cargo/vendor/proc-macro2/rust-toolchain.toml | 2 libclamav_rust/.cargo/vendor/proc-macro2/src/extra.rs | 84 libclamav_rust/.cargo/vendor/proc-macro2/src/fallback.rs | 268 libclamav_rust/.cargo/vendor/proc-macro2/src/lib.rs | 76 libclamav_rust/.cargo/vendor/proc-macro2/src/marker.rs | 4 libclamav_rust/.cargo/vendor/proc-macro2/src/parse.rs | 305 libclamav_rust/.cargo/vendor/proc-macro2/src/rcvec.rs | 9 libclamav_rust/.cargo/vendor/proc-macro2/src/wrapper.rs | 111 libclamav_rust/.cargo/vendor/proc-macro2/tests/marker.rs | 3 libclamav_rust/.cargo/vendor/proc-macro2/tests/test.rs | 116 libclamav_rust/.cargo/vendor/proc-macro2/tests/test_fmt.rs | 2 libclamav_rust/.cargo/vendor/proc-macro2/tests/test_size.rs | 42 libclamav_rust/.cargo/vendor/quote/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/quote/Cargo.toml | 4 libclamav_rust/.cargo/vendor/quote/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/quote/src/lib.rs | 12 libclamav_rust/.cargo/vendor/quote/src/runtime.rs | 60 libclamav_rust/.cargo/vendor/quote/src/spanned.rs | 21 libclamav_rust/.cargo/vendor/quote/tests/test.rs | 32 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter-separated.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/does-not-have-iter.stderr | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/not-quotable.rs | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/not-quotable.stderr | 12 libclamav_rust/.cargo/vendor/quote/tests/ui/not-repeatable.rs | 2 libclamav_rust/.cargo/vendor/quote/tests/ui/not-repeatable.stderr | 6 libclamav_rust/.cargo/vendor/quote/tests/ui/wrong-type-span.stderr | 10 libclamav_rust/.cargo/vendor/syn-1.0.107/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/syn-1.0.107/Cargo.toml | 147 libclamav_rust/.cargo/vendor/syn-1.0.107/LICENSE-APACHE | 201 libclamav_rust/.cargo/vendor/syn-1.0.107/LICENSE-MIT | 23 libclamav_rust/.cargo/vendor/syn-1.0.107/README.md | 285 libclamav_rust/.cargo/vendor/syn-1.0.107/benches/file.rs | 55 libclamav_rust/.cargo/vendor/syn-1.0.107/benches/rust.rs | 170 libclamav_rust/.cargo/vendor/syn-1.0.107/build.rs | 51 libclamav_rust/.cargo/vendor/syn-1.0.107/src/attr.rs | 662 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/await.rs | 2 libclamav_rust/.cargo/vendor/syn-1.0.107/src/bigint.rs | 66 libclamav_rust/.cargo/vendor/syn-1.0.107/src/buffer.rs | 398 libclamav_rust/.cargo/vendor/syn-1.0.107/src/custom_keyword.rs | 253 libclamav_rust/.cargo/vendor/syn-1.0.107/src/custom_punctuation.rs | 300 libclamav_rust/.cargo/vendor/syn-1.0.107/src/data.rs | 493 libclamav_rust/.cargo/vendor/syn-1.0.107/src/derive.rs | 274 libclamav_rust/.cargo/vendor/syn-1.0.107/src/discouraged.rs | 194 libclamav_rust/.cargo/vendor/syn-1.0.107/src/drops.rs | 58 libclamav_rust/.cargo/vendor/syn-1.0.107/src/error.rs | 428 libclamav_rust/.cargo/vendor/syn-1.0.107/src/export.rs | 39 libclamav_rust/.cargo/vendor/syn-1.0.107/src/expr.rs | 3558 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/ext.rs | 139 libclamav_rust/.cargo/vendor/syn-1.0.107/src/file.rs | 125 libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/clone.rs | 2241 +++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/debug.rs | 3042 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/eq.rs | 2195 +++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/fold.rs | 3341 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/hash.rs | 2869 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/visit.rs | 3786 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen/visit_mut.rs | 3786 ++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/gen_helper.rs | 154 libclamav_rust/.cargo/vendor/syn-1.0.107/src/generics.rs | 1362 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/group.rs | 282 libclamav_rust/.cargo/vendor/syn-1.0.107/src/ident.rs | 101 libclamav_rust/.cargo/vendor/syn-1.0.107/src/item.rs | 3313 +++++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/lib.rs | 984 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/lifetime.rs | 154 libclamav_rust/.cargo/vendor/syn-1.0.107/src/lit.rs | 1600 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/lookahead.rs | 169 libclamav_rust/.cargo/vendor/syn-1.0.107/src/mac.rs | 219 libclamav_rust/.cargo/vendor/syn-1.0.107/src/macros.rs | 177 libclamav_rust/.cargo/vendor/syn-1.0.107/src/op.rs | 234 libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse.rs | 1314 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse_macro_input.rs | 179 libclamav_rust/.cargo/vendor/syn-1.0.107/src/parse_quote.rs | 167 libclamav_rust/.cargo/vendor/syn-1.0.107/src/pat.rs | 927 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/path.rs | 854 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/print.rs | 16 libclamav_rust/.cargo/vendor/syn-1.0.107/src/punctuated.rs | 1087 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/reserved.rs | 44 libclamav_rust/.cargo/vendor/syn-1.0.107/src/sealed.rs | 4 libclamav_rust/.cargo/vendor/syn-1.0.107/src/span.rs | 67 libclamav_rust/.cargo/vendor/syn-1.0.107/src/spanned.rs | 114 libclamav_rust/.cargo/vendor/syn-1.0.107/src/stmt.rs | 349 libclamav_rust/.cargo/vendor/syn-1.0.107/src/thread.rs | 41 libclamav_rust/.cargo/vendor/syn-1.0.107/src/token.rs | 1013 + libclamav_rust/.cargo/vendor/syn-1.0.107/src/tt.rs | 107 libclamav_rust/.cargo/vendor/syn-1.0.107/src/ty.rs | 1304 ++ libclamav_rust/.cargo/vendor/syn-1.0.107/src/verbatim.rs | 33 libclamav_rust/.cargo/vendor/syn-1.0.107/src/whitespace.rs | 65 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/eq.rs | 806 + libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/mod.rs | 28 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/common/parse.rs | 48 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/debug/gen.rs | 5640 ++++++++++ libclamav_rust/.cargo/vendor/syn-1.0.107/tests/debug/mod.rs | 125 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/macros/mod.rs | 79 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression.rs | 3 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression/issue1108.rs | 5 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/regression/issue1235.rs | 32 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/repo/mod.rs | 215 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/repo/progress.rs | 37 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_asyncness.rs | 37 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_attribute.rs | 336 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_derive_input.rs | 894 + libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_expr.rs | 306 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_generics.rs | 285 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_grouping.rs | 52 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_ident.rs | 85 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_item.rs | 336 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_iterators.rs | 68 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_lit.rs | 266 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_meta.rs | 378 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_parse_buffer.rs | 92 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_parse_stream.rs | 12 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_pat.rs | 67 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_path.rs | 126 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_precedence.rs | 460 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_receiver.rs | 127 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_round_trip.rs | 241 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_shebang.rs | 59 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_should_parse.rs | 45 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_size.rs | 29 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_stmt.rs | 93 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_token_trees.rs | 30 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_ty.rs | 352 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/test_visibility.rs | 148 libclamav_rust/.cargo/vendor/syn-1.0.107/tests/zzz_stable.rs | 33 libclamav_rust/.cargo/vendor/syn/.cargo-checksum.json | 2 libclamav_rust/.cargo/vendor/syn/Cargo.toml | 36 libclamav_rust/.cargo/vendor/syn/LICENSE-APACHE | 25 libclamav_rust/.cargo/vendor/syn/README.md | 9 libclamav_rust/.cargo/vendor/syn/benches/file.rs | 10 libclamav_rust/.cargo/vendor/syn/benches/rust.rs | 25 libclamav_rust/.cargo/vendor/syn/build.rs | 51 libclamav_rust/.cargo/vendor/syn/src/attr.rs | 656 - libclamav_rust/.cargo/vendor/syn/src/await.rs | 2 libclamav_rust/.cargo/vendor/syn/src/bigint.rs | 6 libclamav_rust/.cargo/vendor/syn/src/buffer.rs | 79 libclamav_rust/.cargo/vendor/syn/src/custom_keyword.rs | 44 libclamav_rust/.cargo/vendor/syn/src/custom_punctuation.rs | 30 libclamav_rust/.cargo/vendor/syn/src/data.rs | 211 libclamav_rust/.cargo/vendor/syn/src/derive.rs | 41 libclamav_rust/.cargo/vendor/syn/src/discouraged.rs | 25 libclamav_rust/.cargo/vendor/syn/src/error.rs | 110 libclamav_rust/.cargo/vendor/syn/src/export.rs | 13 libclamav_rust/.cargo/vendor/syn/src/expr.rs | 1346 +- libclamav_rust/.cargo/vendor/syn/src/ext.rs | 8 libclamav_rust/.cargo/vendor/syn/src/file.rs | 4 libclamav_rust/.cargo/vendor/syn/src/gen/clone.rs | 405 libclamav_rust/.cargo/vendor/syn/src/gen/debug.rs | 2923 ++--- libclamav_rust/.cargo/vendor/syn/src/gen/eq.rs | 446 libclamav_rust/.cargo/vendor/syn/src/gen/fold.rs | 1204 -- libclamav_rust/.cargo/vendor/syn/src/gen/hash.rs | 558 libclamav_rust/.cargo/vendor/syn/src/gen/visit.rs | 1413 +- libclamav_rust/.cargo/vendor/syn/src/gen/visit_mut.rs | 1478 +- libclamav_rust/.cargo/vendor/syn/src/gen_helper.rs | 124 libclamav_rust/.cargo/vendor/syn/src/generics.rs | 387 libclamav_rust/.cargo/vendor/syn/src/group.rs | 11 libclamav_rust/.cargo/vendor/syn/src/ident.rs | 115 libclamav_rust/.cargo/vendor/syn/src/item.rs | 1324 +- libclamav_rust/.cargo/vendor/syn/src/lib.rs | 422 libclamav_rust/.cargo/vendor/syn/src/lifetime.rs | 2 libclamav_rust/.cargo/vendor/syn/src/lit.rs | 252 libclamav_rust/.cargo/vendor/syn/src/lookahead.rs | 8 libclamav_rust/.cargo/vendor/syn/src/mac.rs | 64 libclamav_rust/.cargo/vendor/syn/src/macros.rs | 9 libclamav_rust/.cargo/vendor/syn/src/meta.rs | 420 libclamav_rust/.cargo/vendor/syn/src/op.rs | 70 libclamav_rust/.cargo/vendor/syn/src/parse.rs | 102 libclamav_rust/.cargo/vendor/syn/src/parse_macro_input.rs | 57 libclamav_rust/.cargo/vendor/syn/src/parse_quote.rs | 27 libclamav_rust/.cargo/vendor/syn/src/pat.rs | 700 - libclamav_rust/.cargo/vendor/syn/src/path.rs | 467 libclamav_rust/.cargo/vendor/syn/src/print.rs | 2 libclamav_rust/.cargo/vendor/syn/src/punctuated.rs | 124 libclamav_rust/.cargo/vendor/syn/src/reserved.rs | 44 libclamav_rust/.cargo/vendor/syn/src/restriction.rs | 171 libclamav_rust/.cargo/vendor/syn/src/sealed.rs | 2 libclamav_rust/.cargo/vendor/syn/src/span.rs | 37 libclamav_rust/.cargo/vendor/syn/src/spanned.rs | 15 libclamav_rust/.cargo/vendor/syn/src/stmt.rs | 246 libclamav_rust/.cargo/vendor/syn/src/thread.rs | 15 libclamav_rust/.cargo/vendor/syn/src/token.rs | 605 - libclamav_rust/.cargo/vendor/syn/src/tt.rs | 4 libclamav_rust/.cargo/vendor/syn/src/ty.rs | 408 libclamav_rust/.cargo/vendor/syn/src/verbatim.rs | 2 libclamav_rust/.cargo/vendor/syn/src/whitespace.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/common/eq.rs | 105 libclamav_rust/.cargo/vendor/syn/tests/common/parse.rs | 5 libclamav_rust/.cargo/vendor/syn/tests/debug/gen.rs | 3841 ++---- libclamav_rust/.cargo/vendor/syn/tests/debug/mod.rs | 18 libclamav_rust/.cargo/vendor/syn/tests/regression.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/regression/issue1108.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/repo/mod.rs | 209 libclamav_rust/.cargo/vendor/syn/tests/test_asyncness.rs | 8 libclamav_rust/.cargo/vendor/syn/tests/test_attribute.rs | 171 libclamav_rust/.cargo/vendor/syn/tests/test_derive_input.rs | 277 libclamav_rust/.cargo/vendor/syn/tests/test_expr.rs | 64 libclamav_rust/.cargo/vendor/syn/tests/test_generics.rs | 69 libclamav_rust/.cargo/vendor/syn/tests/test_grouping.rs | 9 libclamav_rust/.cargo/vendor/syn/tests/test_item.rs | 54 libclamav_rust/.cargo/vendor/syn/tests/test_iterators.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/test_lit.rs | 11 libclamav_rust/.cargo/vendor/syn/tests/test_meta.rs | 271 libclamav_rust/.cargo/vendor/syn/tests/test_parse_buffer.rs | 1 libclamav_rust/.cargo/vendor/syn/tests/test_parse_stream.rs | 10 libclamav_rust/.cargo/vendor/syn/tests/test_pat.rs | 52 libclamav_rust/.cargo/vendor/syn/tests/test_path.rs | 38 libclamav_rust/.cargo/vendor/syn/tests/test_precedence.rs | 241 libclamav_rust/.cargo/vendor/syn/tests/test_receiver.rs | 328 libclamav_rust/.cargo/vendor/syn/tests/test_round_trip.rs | 40 libclamav_rust/.cargo/vendor/syn/tests/test_shebang.rs | 30 libclamav_rust/.cargo/vendor/syn/tests/test_size.rs | 15 libclamav_rust/.cargo/vendor/syn/tests/test_stmt.rs | 187 libclamav_rust/.cargo/vendor/syn/tests/test_token_trees.rs | 2 libclamav_rust/.cargo/vendor/syn/tests/test_ty.rs | 53 libclamav_rust/.cargo/vendor/syn/tests/test_visibility.rs | 12 libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json | 1 libclamav_rust/.cargo/vendor/termcolor/COPYING | 3 libclamav_rust/.cargo/vendor/termcolor/Cargo.toml | 40 libclamav_rust/.cargo/vendor/termcolor/LICENSE-MIT | 21 libclamav_rust/.cargo/vendor/termcolor/README.md | 115 libclamav_rust/.cargo/vendor/termcolor/UNLICENSE | 24 libclamav_rust/.cargo/vendor/termcolor/rustfmt.toml | 2 libclamav_rust/.cargo/vendor/termcolor/src/lib.rs | 2350 ---- libclamav_rust/Cargo.toml | 2 libclamav_rust/build.rs | 6 libclamav_rust/src/cdiff.rs | 114 libclamav_rust/src/evidence.rs | 6 libclamav_rust/src/fuzzy_hash.rs | 2 libclamav_rust/src/logging.rs | 6 libclamav_rust/src/sys.rs | 332 libfreshclam/dns.c | 6 libfreshclam/libfreshclam.c | 7 libfreshclam/libfreshclam_internal.c | 44 sigtool/sigtool.c | 6 unit_tests/clamscan/container_sigs_test.py | 4 unit_tests/clamscan/fp_check_test.py | 4 unit_tests/clamscan/heuristics_test.py | 2 unit_tests/freshclam_test.py | 34 unit_tests/sigtool_test.py | 78 484 files changed, 108970 insertions(+), 64684 deletions(-) diff -Nru clamav-1.0.1+dfsg/.gitattributes clamav-1.0.2+dfsg/.gitattributes --- clamav-1.0.1+dfsg/.gitattributes 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/.gitattributes 2023-08-15 22:24:07.000000000 +0000 @@ -9,6 +9,9 @@ # Files that should be left untouched (binary is macro for -text -diff) *.ref binary +# Preserve signature for .cargo/vendor files (from the tarabll) ++/.cargo/vendor binary + # # Exclude files from exporting # diff -Nru clamav-1.0.1+dfsg/.gitignore clamav-1.0.2+dfsg/.gitignore --- clamav-1.0.1+dfsg/.gitignore 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/.gitignore 2023-08-15 22:24:07.000000000 +0000 @@ -228,9 +228,5 @@ debug/ target/ -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - # These are backup files generated by rustfmt **/*.rs.bk diff -Nru clamav-1.0.1+dfsg/CMakeLists.txt clamav-1.0.2+dfsg/CMakeLists.txt --- clamav-1.0.1+dfsg/CMakeLists.txt 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/CMakeLists.txt 2023-08-15 22:24:07.000000000 +0000 @@ -22,7 +22,7 @@ set(VERSION_SUFFIX "") project( ClamAV - VERSION "1.0.1" + VERSION "1.0.2" DESCRIPTION "ClamAV open source email, web, and end-point anti-virus toolkit." ) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) diff -Nru clamav-1.0.1+dfsg/Cargo.lock clamav-1.0.2+dfsg/Cargo.lock --- clamav-1.0.1+dfsg/Cargo.lock 2023-02-13 06:00:35.000000000 +0000 +++ clamav-1.0.2+dfsg/Cargo.lock 2023-08-15 22:24:07.000000000 +0000 @@ -9,15 +9,6 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] name = "ansi_term" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -45,24 +36,24 @@ [[package]] name = "bindgen" -version = "0.59.2" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", "clang-sys", - "clap", - "env_logger", "lazy_static", "lazycell", "log", "peeking_take_while", + "prettyplease", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", + "syn 2.0.15", "which", ] @@ -119,7 +110,7 @@ "quote", "serde", "serde_json", - "syn", + "syn 1.0.107", "tempfile", "toml", ] @@ -284,19 +275,6 @@ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] name = "exr" version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -444,12 +422,6 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] name = "image" version = "0.24.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -691,7 +663,7 @@ dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -707,6 +679,16 @@ ] [[package]] +name = "prettyplease" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +dependencies = [ + "proc-macro2", + "syn 2.0.15", +] + +[[package]] name = "primal-check" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -717,18 +699,18 @@ [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -770,8 +752,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ - "aho-corasick", - "memchr", "regex-syntax", ] @@ -855,7 +835,7 @@ dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -931,6 +911,17 @@ ] [[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] name = "tempfile" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -945,15 +936,6 @@ ] [[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - -[[package]] name = "textwrap" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -979,7 +961,7 @@ dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -1084,7 +1066,7 @@ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] @@ -1106,7 +1088,7 @@ dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1151,15 +1133,6 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff -Nru clamav-1.0.1+dfsg/Jenkinsfile clamav-1.0.2+dfsg/Jenkinsfile --- clamav-1.0.1+dfsg/Jenkinsfile 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/Jenkinsfile 2023-08-15 22:24:07.000000000 +0000 @@ -10,7 +10,7 @@ parameters( [ string(name: 'VERSION', - defaultValue: '1.0.1', + defaultValue: '1.0.2', description: 'ClamAV version string'), string(name: 'FRAMEWORK_BRANCH', defaultValue: '1.0', @@ -37,7 +37,7 @@ defaultValue: 'fuzz-regression-1.0', description: 'test-pipelines branch for fuzz regression tests'), string(name: 'FUZZ_CORPUS_BRANCH', - defaultValue: 'master', + defaultValue: '1.0', description: 'private-fuzz-corpus branch'), string(name: 'APPCHECK_PIPELINE', defaultValue: 'appcheck-1.0', diff -Nru clamav-1.0.1+dfsg/NEWS.md clamav-1.0.2+dfsg/NEWS.md --- clamav-1.0.1+dfsg/NEWS.md 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/NEWS.md 2023-08-15 22:24:07.000000000 +0000 @@ -3,6 +3,65 @@ Note: This file refers to the official packages. Things described here may differ slightly from third-party binary packages. +## 1.0.2 + +ClamAV 1.0.2 is a critical patch release with the following fixes: + +- [CVE-2023-20197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20197) + Fixed a possible denial of service vulnerability in the HFS+ file parser. + This issue affects versions 1.1.0, 1.0.1 through 1.0.0, 0.105.2 through 0.105.0, + 0.104.4 through 0.104.0, and 0.103.8 through 0.103.0. + Thank you to Steve Smith for reporting this issue. + +- [CVE-2023-20212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-20212) + Fixed a possible denial of service vulnerability in the AutoIt file parser. + This issue affects versions 1.0.1 and 1.0.0. + This issue does not affect version 1.1.0. + +- Fixed a build issue when using the Rust nightly toolchain, which was + affecting the oss-fuzz build environment used for regression tests. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/996 + +- Fixed a build issue on Windows when using Rust version 1.70 or newer. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/993 + +- CMake build system improvement to support compiling with OpenSSL 3.x on + macOS with the Xcode toolchain. + + The official ClamAV installers and packages are now built with OpenSSL 3.1.1 + or newer. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/973 + +- Fixed an issue where ClamAV does not abort the signature load process after + partially loading an invalid signature. + The bug would later cause a crash when scanning certain files. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/952 + +- Fixed an issue so that ClamAV correctly removes temporary files generated + by the VBA and XLM extraction modules so that the files are not leaked in + patched versions of ClamAV where temporary files are written directly to the + temp-directory instead of writing to a unique subdirectory. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/900 + +- Set Git attributes to prevent Git from altering line endings for bundled Rust + libraries. Third-party Rust libraries are bundled in the ClamAV release + tarball. We do not commit them to our own Git repository, but community + package maintainers may now store the tarball contents in Git. + The Rust build system verifies the library manifest, and this change + ensures that the hashes are correct. + Improvement courtesy of Nicolas R. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/856 + +- Fixed two bugs that would cause Freshclam to fail update when applying a + CDIFF database patch if that patch adds a file to the database archive + or removes a file from the database archive. + This bug also caused Sigtool to fail to create such a patch. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/901 + +Special thanks to the following people for code contributions and bug reports: +- Nicolas R. +- Steve Smith + ## 1.0.1 ClamAV 1.0.1 is a critical patch release with the following fixes: diff -Nru clamav-1.0.1+dfsg/clamsubmit/CMakeLists.txt clamav-1.0.2+dfsg/clamsubmit/CMakeLists.txt --- clamav-1.0.1+dfsg/clamsubmit/CMakeLists.txt 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/clamsubmit/CMakeLists.txt 2023-08-15 22:24:07.000000000 +0000 @@ -34,6 +34,8 @@ PRIVATE ClamAV::libclamav ClamAV::common + OpenSSL::SSL + OpenSSL::Crypto JSONC::jsonc CURL::libcurl ) if(APPLE) diff -Nru clamav-1.0.1+dfsg/cmake/FindRust.cmake clamav-1.0.2+dfsg/cmake/FindRust.cmake --- clamav-1.0.1+dfsg/cmake/FindRust.cmake 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/cmake/FindRust.cmake 2023-08-15 22:24:07.000000000 +0000 @@ -294,6 +294,21 @@ WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" DEPENDS ${LIB_SOURCES} COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(arm64)$") + add_custom_command( + OUTPUT "${OUTPUT}" + COMMAND ${CMAKE_COMMAND} -E env "CARGO_CMD=build" "CARGO_TARGET_DIR=${ARGS_BINARY_DIRECTORY}" "MAINTAINER_MODE=${MAINTAINER_MODE}" "RUSTFLAGS=${RUSTFLAGS}" ${cargo_EXECUTABLE} ${MY_CARGO_ARGS} --target=aarch64-apple-darwin + WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" + DEPENDS ${LIB_SOURCES} + COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") + elseif("${CMAKE_OSX_ARCHITECTURES}" MATCHES "^(x86_64)$") + add_custom_command( + OUTPUT "${OUTPUT}" + COMMAND ${CMAKE_COMMAND} -E env "CARGO_CMD=build" "CARGO_TARGET_DIR=${ARGS_BINARY_DIRECTORY}" "MAINTAINER_MODE=${MAINTAINER_MODE}" "RUSTFLAGS=${RUSTFLAGS}" ${cargo_EXECUTABLE} ${MY_CARGO_ARGS} --target=x86_64-apple-darwin + COMMAND ${CMAKE_COMMAND} -E make_directory "${ARGS_BINARY_DIRECTORY}/${RUST_COMPILER_TARGET}/${CARGO_BUILD_TYPE}" + WORKING_DIRECTORY "${ARGS_SOURCE_DIRECTORY}" + DEPENDS ${LIB_SOURCES} + COMMENT "Building ${ARGS_TARGET} in ${ARGS_BINARY_DIRECTORY} with: ${cargo_EXECUTABLE} ${MY_CARGO_ARGS_STRING}") else() add_custom_command( OUTPUT "${OUTPUT}" @@ -382,10 +397,17 @@ ${rustc_VERSION} < ${RUSTC_MINIMUM_REQUIRED}") endif() +if(WIN32) + file(TOUCH ${CMAKE_BINARY_DIR}/empty-file) + set(EMPTY_FILE "${CMAKE_BINARY_DIR}/empty-file") +else() + set(EMPTY_FILE "/dev/null") +endif() + # Determine the native libs required to link w/ rust static libs -# message(STATUS "Detecting native static libs for rust: ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs /dev/null") +# message(STATUS "Detecting native static libs for rust: ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs ${EMPTY_FILE}") execute_process( - COMMAND ${CMAKE_COMMAND} -E env "CARGO_TARGET_DIR=${CMAKE_BINARY_DIR}" ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs /dev/null + COMMAND ${CMAKE_COMMAND} -E env "CARGO_TARGET_DIR=${CMAKE_BINARY_DIR}" ${rustc_EXECUTABLE} --crate-type staticlib --print=native-static-libs ${EMPTY_FILE} OUTPUT_VARIABLE RUST_NATIVE_STATIC_LIBS_OUTPUT ERROR_VARIABLE RUST_NATIVE_STATIC_LIBS_ERROR RESULT_VARIABLE RUST_NATIVE_STATIC_LIBS_RESULT diff -Nru clamav-1.0.1+dfsg/debian/.git-dpm clamav-1.0.2+dfsg/debian/.git-dpm --- clamav-1.0.1+dfsg/debian/.git-dpm 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/.git-dpm 2023-08-27 09:35:11.000000000 +0000 @@ -1,8 +1,8 @@ # see git-dpm(1) from git-dpm package -b1b7438ad627cb935c5e7b9923342bc2e26d4137 -b1b7438ad627cb935c5e7b9923342bc2e26d4137 -b0f7da741add13e9a19254cc0697931223f948eb -b0f7da741add13e9a19254cc0697931223f948eb -clamav_1.0.1.orig.tar.xz -fe18edded75204a2b4b4ec0c73c22da14e5235c2 -14132600 +de9cef7ab6e5a57247f9598340a0e64869429870 +de9cef7ab6e5a57247f9598340a0e64869429870 +7b4b490a9f8c93c9ef66c8d34be648796dd9f7bd +7b4b490a9f8c93c9ef66c8d34be648796dd9f7bd +clamav_1.0.2+dfsg.orig.tar.xz +c845d2c777adda943e7421c601924e1bee1864a8 +14134372 diff -Nru clamav-1.0.1+dfsg/debian/changelog clamav-1.0.2+dfsg/debian/changelog --- clamav-1.0.1+dfsg/debian/changelog 2023-02-26 16:39:06.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/changelog 2023-08-27 09:35:11.000000000 +0000 @@ -1,3 +1,16 @@ +clamav (1.0.2+dfsg-1~deb12u1) bookworm; urgency=medium + + * Import 1.0.2 (Closes: #1050057) + - CVE-2023-20197 (Possible DoS in HFS+ file parser). + - CVE-2023-20212 (Possible DoS in AutoIt file parser). + * Use cmake for xml2 detection (Closes: #949100). + * Replace tomsfastmath with OpenSSL's BN. + * Don't enable clamonacc by default (Closes: #1030171). + * Let the clamav-daemon.socket depend on the service file again + (Closes: #1044136). + + -- Sebastian Andrzej Siewior Sun, 27 Aug 2023 11:35:11 +0200 + clamav (1.0.1+dfsg-2) unstable; urgency=medium * Depend on latest libtfm1 (Closes: #1031896, #1027010). diff -Nru clamav-1.0.1+dfsg/debian/control clamav-1.0.2+dfsg/debian/control --- clamav-1.0.1+dfsg/debian/control 2023-02-26 16:35:50.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/control 2023-08-27 09:35:11.000000000 +0000 @@ -26,7 +26,6 @@ libpcre2-dev, libssl-dev, libsystemd-dev [linux-any], - libtfm-dev, libxml2-dev, perl:native, pkg-config, @@ -137,7 +136,7 @@ Section: libs Architecture: any Multi-Arch: same -Depends: ${misc:Depends}, ${shlibs:Depends}, libtfm1 (> 0.13.1) +Depends: ${misc:Depends}, ${shlibs:Depends} Suggests: libclamunrar, libclamunrar11 Replaces: libclamav9 Description: anti-virus utility for Unix - library diff -Nru clamav-1.0.1+dfsg/debian/libclamav11.symbols clamav-1.0.2+dfsg/debian/libclamav11.symbols --- clamav-1.0.1+dfsg/debian/libclamav11.symbols 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/libclamav11.symbols 2023-08-27 09:35:11.000000000 +0000 @@ -1,25 +1,25 @@ libclamav.so.11 libclamav11 #MINVER# * Build-Depends-Package: libclamav-dev - CLAMAV_PRIVATE@CLAMAV_PRIVATE 1.0.1 + CLAMAV_PRIVATE@CLAMAV_PRIVATE 1.0.2 CLAMAV_PUBLIC@CLAMAV_PUBLIC 1.0.0 - __cli_strcasestr@CLAMAV_PRIVATE 1.0.1 - __cli_strndup@CLAMAV_PRIVATE 1.0.1 - __cli_strnlen@CLAMAV_PRIVATE 1.0.1 - __cli_strnstr@CLAMAV_PRIVATE 1.0.1 - base64Flush@CLAMAV_PRIVATE 1.0.1 - blobAddData@CLAMAV_PRIVATE 1.0.1 - blobCreate@CLAMAV_PRIVATE 1.0.1 - blobDestroy@CLAMAV_PRIVATE 1.0.1 - cl_ASN1_GetTimeT@CLAMAV_PRIVATE 1.0.1 + __cli_strcasestr@CLAMAV_PRIVATE 1.0.2 + __cli_strndup@CLAMAV_PRIVATE 1.0.2 + __cli_strnlen@CLAMAV_PRIVATE 1.0.2 + __cli_strnstr@CLAMAV_PRIVATE 1.0.2 + base64Flush@CLAMAV_PRIVATE 1.0.2 + blobAddData@CLAMAV_PRIVATE 1.0.2 + blobCreate@CLAMAV_PRIVATE 1.0.2 + blobDestroy@CLAMAV_PRIVATE 1.0.2 + cl_ASN1_GetTimeT@CLAMAV_PRIVATE 1.0.2 cl_always_gen_section_hash@CLAMAV_PUBLIC 1.0.0 - cl_base64_decode@CLAMAV_PRIVATE 1.0.1 - cl_base64_encode@CLAMAV_PRIVATE 1.0.1 - cl_cleanup_crypto@CLAMAV_PRIVATE 1.0.1 + cl_base64_decode@CLAMAV_PRIVATE 1.0.2 + cl_base64_encode@CLAMAV_PRIVATE 1.0.2 + cl_cleanup_crypto@CLAMAV_PRIVATE 1.0.2 cl_countsigs@CLAMAV_PUBLIC 1.0.0 cl_cvdfree@CLAMAV_PUBLIC 1.0.0 cl_cvdhead@CLAMAV_PUBLIC 1.0.0 cl_cvdparse@CLAMAV_PUBLIC 1.0.0 - cl_cvdunpack@CLAMAV_PRIVATE 1.0.1 + cl_cvdunpack@CLAMAV_PRIVATE 1.0.2 cl_cvdverify@CLAMAV_PUBLIC 1.0.0 cl_debug@CLAMAV_PUBLIC 1.0.0 cl_engine_addref@CLAMAV_PUBLIC 1.0.0 @@ -28,7 +28,7 @@ cl_engine_get_num@CLAMAV_PUBLIC 1.0.0 cl_engine_get_str@CLAMAV_PUBLIC 1.0.0 cl_engine_new@CLAMAV_PUBLIC 1.0.0 - cl_engine_set_clcb_engine_compile_progress@CLAMAV_PRIVATE 1.0.1 + cl_engine_set_clcb_engine_compile_progress@CLAMAV_PRIVATE 1.0.2 cl_engine_set_clcb_file_inspection@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_file_props@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_hash@CLAMAV_PUBLIC 1.0.0 @@ -37,7 +37,7 @@ cl_engine_set_clcb_pre_cache@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_pre_scan@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_sigload@CLAMAV_PUBLIC 1.0.0 - cl_engine_set_clcb_sigload_progress@CLAMAV_PRIVATE 1.0.1 + cl_engine_set_clcb_sigload_progress@CLAMAV_PRIVATE 1.0.2 cl_engine_set_clcb_stats_add_sample@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_stats_decrement_count@CLAMAV_PUBLIC 1.0.0 cl_engine_set_clcb_stats_flush@CLAMAV_PUBLIC 1.0.0 @@ -58,19 +58,19 @@ cl_fmap_close@CLAMAV_PUBLIC 1.0.0 cl_fmap_open_handle@CLAMAV_PUBLIC 1.0.0 cl_fmap_open_memory@CLAMAV_PUBLIC 1.0.0 - cl_get_pkey_file@CLAMAV_PRIVATE 1.0.1 - cl_get_x509_from_mem@CLAMAV_PRIVATE 1.0.1 - cl_hash_data@CLAMAV_PRIVATE 1.0.1 + cl_get_pkey_file@CLAMAV_PRIVATE 1.0.2 + cl_get_x509_from_mem@CLAMAV_PRIVATE 1.0.2 + cl_hash_data@CLAMAV_PRIVATE 1.0.2 cl_hash_destroy@CLAMAV_PUBLIC 1.0.0 - cl_hash_file_fd@CLAMAV_PRIVATE 1.0.1 - cl_hash_file_fd_ctx@CLAMAV_PRIVATE 1.0.1 - cl_hash_file_fp@CLAMAV_PRIVATE 1.0.1 + cl_hash_file_fd@CLAMAV_PRIVATE 1.0.2 + cl_hash_file_fd_ctx@CLAMAV_PRIVATE 1.0.2 + cl_hash_file_fp@CLAMAV_PRIVATE 1.0.2 cl_hash_init@CLAMAV_PUBLIC 1.0.0 cl_init@CLAMAV_PUBLIC 1.0.0 - cl_initialize_crypto@CLAMAV_PRIVATE 1.0.1 + cl_initialize_crypto@CLAMAV_PRIVATE 1.0.2 cl_load@CLAMAV_PUBLIC 1.0.0 - cl_load_cert@CLAMAV_PRIVATE 1.0.1 - cl_load_crl@CLAMAV_PRIVATE 1.0.1 + cl_load_cert@CLAMAV_PRIVATE 1.0.2 + cl_load_crl@CLAMAV_PRIVATE 1.0.2 cl_retdbdir@CLAMAV_PUBLIC 1.0.0 cl_retflevel@CLAMAV_PUBLIC 1.0.1 cl_retver@CLAMAV_PUBLIC 1.0.0 @@ -80,203 +80,203 @@ cl_scanfile_callback@CLAMAV_PUBLIC 1.0.0 cl_scanmap_callback@CLAMAV_PUBLIC 1.0.0 cl_set_clcb_msg@CLAMAV_PUBLIC 1.0.0 - cl_sha1@CLAMAV_PRIVATE 1.0.1 - cl_sha256@CLAMAV_PRIVATE 1.0.1 - cl_sign_data@CLAMAV_PRIVATE 1.0.1 - cl_sign_data_keyfile@CLAMAV_PRIVATE 1.0.1 - cl_sign_file_fd@CLAMAV_PRIVATE 1.0.1 - cl_sign_file_fp@CLAMAV_PRIVATE 1.0.1 + cl_sha1@CLAMAV_PRIVATE 1.0.2 + cl_sha256@CLAMAV_PRIVATE 1.0.2 + cl_sign_data@CLAMAV_PRIVATE 1.0.2 + cl_sign_data_keyfile@CLAMAV_PRIVATE 1.0.2 + cl_sign_file_fd@CLAMAV_PRIVATE 1.0.2 + cl_sign_file_fp@CLAMAV_PRIVATE 1.0.2 cl_statchkdir@CLAMAV_PUBLIC 1.0.0 cl_statfree@CLAMAV_PUBLIC 1.0.0 cl_statinidir@CLAMAV_PUBLIC 1.0.0 cl_strerror@CLAMAV_PUBLIC 1.0.0 cl_update_hash@CLAMAV_PUBLIC 1.0.0 - cl_validate_certificate_chain@CLAMAV_PRIVATE 1.0.1 - cl_validate_certificate_chain_ts_dir@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_fd@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_fd_x509@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_fd_x509_keyfile@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_hash@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_hash_x509@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_hash_x509_keyfile@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_x509@CLAMAV_PRIVATE 1.0.1 - cl_verify_signature_x509_keyfile@CLAMAV_PRIVATE 1.0.1 - cli_ac_buildtrie@CLAMAV_PRIVATE 1.0.1 - cli_ac_chklsig@CLAMAV_PRIVATE 1.0.1 - cli_ac_free@CLAMAV_PRIVATE 1.0.1 - cli_ac_freedata@CLAMAV_PRIVATE 1.0.1 - cli_ac_init@CLAMAV_PRIVATE 1.0.1 - cli_ac_initdata@CLAMAV_PRIVATE 1.0.1 - cli_ac_scanbuff@CLAMAV_PRIVATE 1.0.1 - cli_add_content_match_pattern@CLAMAV_PRIVATE 1.0.1 - cli_basename@CLAMAV_PRIVATE 1.0.1 - cli_bm_free@CLAMAV_PRIVATE 1.0.1 - cli_bm_init@CLAMAV_PRIVATE 1.0.1 - cli_bm_scanbuff@CLAMAV_PRIVATE 1.0.1 - cli_build_regex_list@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_alloc@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_destroy@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_getresult_int@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_set_trace@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_setfile@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_setfuncid@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_setparam_int@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_context_setparam_ptr@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_debug@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_debug_printsrc@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_describe@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_destroy@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_done@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_init@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_load@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_prepare2@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_printversion@CLAMAV_PRIVATE 1.0.1 - cli_bytecode_run@CLAMAV_PRIVATE 1.0.1 - cli_bytefunc_describe@CLAMAV_PRIVATE 1.0.1 - cli_byteinst_describe@CLAMAV_PRIVATE 1.0.1 - cli_bytetype_describe@CLAMAV_PRIVATE 1.0.1 - cli_bytevalue_describe@CLAMAV_PRIVATE 1.0.1 - cli_calloc@CLAMAV_PRIVATE 1.0.1 - cli_check_auth_header@CLAMAV_PRIVATE 1.0.1 - cli_chomp@CLAMAV_PRIVATE 1.0.1 - cli_codepage_to_utf8@CLAMAV_PRIVATE 1.0.1 - cli_ctime@CLAMAV_PRIVATE 1.0.1 - cli_dbgmsg@CLAMAV_PRIVATE 1.0.1 - cli_dbgmsg_no_inline@CLAMAV_PRIVATE 1.0.1 - cli_dconf_init@CLAMAV_PRIVATE 1.0.1 - cli_debug_flag@CLAMAV_PRIVATE 1.0.1 - cli_detect_environment@CLAMAV_PRIVATE 1.0.1 - cli_disasm_one@CLAMAV_PRIVATE 1.0.1 - cli_errmsg@CLAMAV_PRIVATE 1.0.1 - cli_filecopy@CLAMAV_PRIVATE 1.0.1 - cli_free_vba_project@CLAMAV_PRIVATE 1.0.1 - cli_ftw@CLAMAV_PRIVATE 1.0.1 - cli_genhash_pe@CLAMAV_PRIVATE 1.0.1 - cli_gentemp@CLAMAV_PRIVATE 1.0.1 - cli_gentemp_with_prefix@CLAMAV_PRIVATE 1.0.1 - cli_gentempfd@CLAMAV_PRIVATE 1.0.1 - cli_gentempfd_with_prefix@CLAMAV_PRIVATE 1.0.1 - cli_get_debug_flag@CLAMAV_PRIVATE 1.0.1 - cli_get_filepath_from_filedesc@CLAMAV_PRIVATE 1.0.1 - cli_get_last_virus_str@CLAMAV_PRIVATE 1.0.1 - cli_getdsig@CLAMAV_PRIVATE 1.0.1 - cli_gettmpdir@CLAMAV_PRIVATE 1.0.1 - cli_hashfile@CLAMAV_PRIVATE 1.0.1 - cli_hashset_destroy@CLAMAV_PRIVATE 1.0.1 - cli_hashstream@CLAMAV_PRIVATE 1.0.1 - cli_hex2str@CLAMAV_PRIVATE 1.0.1 - cli_hex2ui@CLAMAV_PRIVATE 1.0.1 - cli_infomsg_simple@CLAMAV_PRIVATE 1.0.1 - cli_initroots@CLAMAV_PRIVATE 1.0.1 - cli_isnumber@CLAMAV_PRIVATE 1.0.1 - cli_js_destroy@CLAMAV_PRIVATE 1.0.1 - cli_js_init@CLAMAV_PRIVATE 1.0.1 - cli_js_output@CLAMAV_PRIVATE 1.0.1 - cli_js_parse_done@CLAMAV_PRIVATE 1.0.1 - cli_js_process_buffer@CLAMAV_PRIVATE 1.0.1 - cli_ldbtokenize@CLAMAV_PRIVATE 1.0.1 - cli_malloc@CLAMAV_PRIVATE 1.0.1 - cli_memstr@CLAMAV_PRIVATE 1.0.1 - cli_ole2_extract@CLAMAV_PRIVATE 1.0.1 - cli_pcre_build@CLAMAV_PRIVATE 1.0.1 - cli_pcre_freeoff@CLAMAV_PRIVATE 1.0.1 - cli_pcre_init@CLAMAV_PRIVATE 1.0.1 - cli_pcre_perf_events_destroy@CLAMAV_PRIVATE 1.0.1 - cli_pcre_perf_print@CLAMAV_PRIVATE 1.0.1 - cli_pcre_recaloff@CLAMAV_PRIVATE 1.0.1 - cli_pcre_scanbuf@CLAMAV_PRIVATE 1.0.1 - cli_ppt_vba_read@CLAMAV_PRIVATE 1.0.1 - cli_printcxxver@CLAMAV_PRIVATE 1.0.1 - cli_readn@CLAMAV_PRIVATE 1.0.1 - cli_realloc@CLAMAV_PRIVATE 1.0.1 - cli_realpath@CLAMAV_PRIVATE 1.0.1 - cli_regcomp@CLAMAV_PRIVATE 1.0.1 - cli_regex2suffix@CLAMAV_PRIVATE 1.0.1 - cli_regexec@CLAMAV_PRIVATE 1.0.1 - cli_regfree@CLAMAV_PRIVATE 1.0.1 - cli_rmdirs@CLAMAV_PRIVATE 1.0.1 - cli_rndnum@CLAMAV_PRIVATE 1.0.1 - cli_sanitize_filepath@CLAMAV_PRIVATE 1.0.1 - cli_scan_buff@CLAMAV_PRIVATE 1.0.1 - cli_scan_fmap@CLAMAV_PRIVATE 1.0.1 - cli_set_debug_flag@CLAMAV_PRIVATE 1.0.1 - cli_sigopts_handler@CLAMAV_PRIVATE 1.0.1 - cli_sigperf_events_destroy@CLAMAV_PRIVATE 1.0.1 - cli_sigperf_print@CLAMAV_PRIVATE 1.0.1 - cli_str2hex@CLAMAV_PRIVATE 1.0.1 - cli_strbcasestr@CLAMAV_PRIVATE 1.0.1 - cli_strdup@CLAMAV_PRIVATE 1.0.1 - cli_strerror@CLAMAV_PRIVATE 1.0.1 - cli_strlcat@CLAMAV_PRIVATE 1.0.1 - cli_strlcpy@CLAMAV_PRIVATE 1.0.1 - cli_strntoul@CLAMAV_PRIVATE 1.0.1 - cli_strrcpy@CLAMAV_PRIVATE 1.0.1 - cli_strtok@CLAMAV_PRIVATE 1.0.1 - cli_strtokbuf@CLAMAV_PRIVATE 1.0.1 - cli_strtokenize@CLAMAV_PRIVATE 1.0.1 - cli_textbuffer_append_normalize@CLAMAV_PRIVATE 1.0.1 - cli_unescape@CLAMAV_PRIVATE 1.0.1 - cli_unlink@CLAMAV_PRIVATE 1.0.1 - cli_url_canon@CLAMAV_PRIVATE 1.0.1 - cli_utf16_to_utf8@CLAMAV_PRIVATE 1.0.1 - cli_utf16toascii@CLAMAV_PRIVATE 1.0.1 - cli_vba_inflate@CLAMAV_PRIVATE 1.0.1 - cli_vba_readdir@CLAMAV_PRIVATE 1.0.1 - cli_versig2@CLAMAV_PRIVATE 1.0.1 - cli_versig@CLAMAV_PRIVATE 1.0.1 - cli_warnmsg@CLAMAV_PRIVATE 1.0.1 - cli_wm_decrypt_macro@CLAMAV_PRIVATE 1.0.1 - cli_wm_readdir@CLAMAV_PRIVATE 1.0.1 - cli_writen@CLAMAV_PRIVATE 1.0.1 - decodeLine@CLAMAV_PRIVATE 1.0.1 - disasmbuf@CLAMAV_PRIVATE 1.0.1 - fmap@CLAMAV_PRIVATE 1.0.1 - fmap_dump_to_file@CLAMAV_PRIVATE 1.0.1 - fmap_duplicate@CLAMAV_PRIVATE 1.0.1 - free_duplicate_fmap@CLAMAV_PRIVATE 1.0.1 - get_fpu_endian@CLAMAV_PRIVATE 1.0.1 - have_clamjit@CLAMAV_PRIVATE 1.0.1 - have_rar@CLAMAV_PRIVATE 1.0.1 - html_normalise_map@CLAMAV_PRIVATE 1.0.1 - html_normalise_mem@CLAMAV_PRIVATE 1.0.1 - html_screnc_decode@CLAMAV_PRIVATE 1.0.1 - html_tag_arg_free@CLAMAV_PRIVATE 1.0.1 - init_allow_list@CLAMAV_PRIVATE 1.0.1 - init_domain_list@CLAMAV_PRIVATE 1.0.1 - init_regex_list@CLAMAV_PRIVATE 1.0.1 - is_regex_ok@CLAMAV_PRIVATE 1.0.1 - load_regex_matcher@CLAMAV_PRIVATE 1.0.1 - lsig_increment_subsig_match@CLAMAV_PRIVATE 1.0.1 + cl_validate_certificate_chain@CLAMAV_PRIVATE 1.0.2 + cl_validate_certificate_chain_ts_dir@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_fd@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_fd_x509@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_fd_x509_keyfile@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_hash@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_hash_x509@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_hash_x509_keyfile@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_x509@CLAMAV_PRIVATE 1.0.2 + cl_verify_signature_x509_keyfile@CLAMAV_PRIVATE 1.0.2 + cli_ac_buildtrie@CLAMAV_PRIVATE 1.0.2 + cli_ac_chklsig@CLAMAV_PRIVATE 1.0.2 + cli_ac_free@CLAMAV_PRIVATE 1.0.2 + cli_ac_freedata@CLAMAV_PRIVATE 1.0.2 + cli_ac_init@CLAMAV_PRIVATE 1.0.2 + cli_ac_initdata@CLAMAV_PRIVATE 1.0.2 + cli_ac_scanbuff@CLAMAV_PRIVATE 1.0.2 + cli_add_content_match_pattern@CLAMAV_PRIVATE 1.0.2 + cli_basename@CLAMAV_PRIVATE 1.0.2 + cli_bm_free@CLAMAV_PRIVATE 1.0.2 + cli_bm_init@CLAMAV_PRIVATE 1.0.2 + cli_bm_scanbuff@CLAMAV_PRIVATE 1.0.2 + cli_build_regex_list@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_alloc@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_destroy@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_getresult_int@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_set_trace@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_setfile@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_setfuncid@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_setparam_int@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_context_setparam_ptr@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_debug@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_debug_printsrc@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_describe@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_destroy@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_done@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_init@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_load@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_prepare2@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_printversion@CLAMAV_PRIVATE 1.0.2 + cli_bytecode_run@CLAMAV_PRIVATE 1.0.2 + cli_bytefunc_describe@CLAMAV_PRIVATE 1.0.2 + cli_byteinst_describe@CLAMAV_PRIVATE 1.0.2 + cli_bytetype_describe@CLAMAV_PRIVATE 1.0.2 + cli_bytevalue_describe@CLAMAV_PRIVATE 1.0.2 + cli_calloc@CLAMAV_PRIVATE 1.0.2 + cli_check_auth_header@CLAMAV_PRIVATE 1.0.2 + cli_chomp@CLAMAV_PRIVATE 1.0.2 + cli_codepage_to_utf8@CLAMAV_PRIVATE 1.0.2 + cli_ctime@CLAMAV_PRIVATE 1.0.2 + cli_dbgmsg@CLAMAV_PRIVATE 1.0.2 + cli_dbgmsg_no_inline@CLAMAV_PRIVATE 1.0.2 + cli_dconf_init@CLAMAV_PRIVATE 1.0.2 + cli_debug_flag@CLAMAV_PRIVATE 1.0.2 + cli_detect_environment@CLAMAV_PRIVATE 1.0.2 + cli_disasm_one@CLAMAV_PRIVATE 1.0.2 + cli_errmsg@CLAMAV_PRIVATE 1.0.2 + cli_filecopy@CLAMAV_PRIVATE 1.0.2 + cli_free_vba_project@CLAMAV_PRIVATE 1.0.2 + cli_ftw@CLAMAV_PRIVATE 1.0.2 + cli_genhash_pe@CLAMAV_PRIVATE 1.0.2 + cli_gentemp@CLAMAV_PRIVATE 1.0.2 + cli_gentemp_with_prefix@CLAMAV_PRIVATE 1.0.2 + cli_gentempfd@CLAMAV_PRIVATE 1.0.2 + cli_gentempfd_with_prefix@CLAMAV_PRIVATE 1.0.2 + cli_get_debug_flag@CLAMAV_PRIVATE 1.0.2 + cli_get_filepath_from_filedesc@CLAMAV_PRIVATE 1.0.2 + cli_get_last_virus_str@CLAMAV_PRIVATE 1.0.2 + cli_getdsig@CLAMAV_PRIVATE 1.0.2 + cli_gettmpdir@CLAMAV_PRIVATE 1.0.2 + cli_hashfile@CLAMAV_PRIVATE 1.0.2 + cli_hashset_destroy@CLAMAV_PRIVATE 1.0.2 + cli_hashstream@CLAMAV_PRIVATE 1.0.2 + cli_hex2str@CLAMAV_PRIVATE 1.0.2 + cli_hex2ui@CLAMAV_PRIVATE 1.0.2 + cli_infomsg_simple@CLAMAV_PRIVATE 1.0.2 + cli_initroots@CLAMAV_PRIVATE 1.0.2 + cli_isnumber@CLAMAV_PRIVATE 1.0.2 + cli_js_destroy@CLAMAV_PRIVATE 1.0.2 + cli_js_init@CLAMAV_PRIVATE 1.0.2 + cli_js_output@CLAMAV_PRIVATE 1.0.2 + cli_js_parse_done@CLAMAV_PRIVATE 1.0.2 + cli_js_process_buffer@CLAMAV_PRIVATE 1.0.2 + cli_ldbtokenize@CLAMAV_PRIVATE 1.0.2 + cli_malloc@CLAMAV_PRIVATE 1.0.2 + cli_memstr@CLAMAV_PRIVATE 1.0.2 + cli_ole2_extract@CLAMAV_PRIVATE 1.0.2 + cli_pcre_build@CLAMAV_PRIVATE 1.0.2 + cli_pcre_freeoff@CLAMAV_PRIVATE 1.0.2 + cli_pcre_init@CLAMAV_PRIVATE 1.0.2 + cli_pcre_perf_events_destroy@CLAMAV_PRIVATE 1.0.2 + cli_pcre_perf_print@CLAMAV_PRIVATE 1.0.2 + cli_pcre_recaloff@CLAMAV_PRIVATE 1.0.2 + cli_pcre_scanbuf@CLAMAV_PRIVATE 1.0.2 + cli_ppt_vba_read@CLAMAV_PRIVATE 1.0.2 + cli_printcxxver@CLAMAV_PRIVATE 1.0.2 + cli_readn@CLAMAV_PRIVATE 1.0.2 + cli_realloc@CLAMAV_PRIVATE 1.0.2 + cli_realpath@CLAMAV_PRIVATE 1.0.2 + cli_regcomp@CLAMAV_PRIVATE 1.0.2 + cli_regex2suffix@CLAMAV_PRIVATE 1.0.2 + cli_regexec@CLAMAV_PRIVATE 1.0.2 + cli_regfree@CLAMAV_PRIVATE 1.0.2 + cli_rmdirs@CLAMAV_PRIVATE 1.0.2 + cli_rndnum@CLAMAV_PRIVATE 1.0.2 + cli_sanitize_filepath@CLAMAV_PRIVATE 1.0.2 + cli_scan_buff@CLAMAV_PRIVATE 1.0.2 + cli_scan_fmap@CLAMAV_PRIVATE 1.0.2 + cli_set_debug_flag@CLAMAV_PRIVATE 1.0.2 + cli_sigopts_handler@CLAMAV_PRIVATE 1.0.2 + cli_sigperf_events_destroy@CLAMAV_PRIVATE 1.0.2 + cli_sigperf_print@CLAMAV_PRIVATE 1.0.2 + cli_str2hex@CLAMAV_PRIVATE 1.0.2 + cli_strbcasestr@CLAMAV_PRIVATE 1.0.2 + cli_strdup@CLAMAV_PRIVATE 1.0.2 + cli_strerror@CLAMAV_PRIVATE 1.0.2 + cli_strlcat@CLAMAV_PRIVATE 1.0.2 + cli_strlcpy@CLAMAV_PRIVATE 1.0.2 + cli_strntoul@CLAMAV_PRIVATE 1.0.2 + cli_strrcpy@CLAMAV_PRIVATE 1.0.2 + cli_strtok@CLAMAV_PRIVATE 1.0.2 + cli_strtokbuf@CLAMAV_PRIVATE 1.0.2 + cli_strtokenize@CLAMAV_PRIVATE 1.0.2 + cli_textbuffer_append_normalize@CLAMAV_PRIVATE 1.0.2 + cli_unescape@CLAMAV_PRIVATE 1.0.2 + cli_unlink@CLAMAV_PRIVATE 1.0.2 + cli_url_canon@CLAMAV_PRIVATE 1.0.2 + cli_utf16_to_utf8@CLAMAV_PRIVATE 1.0.2 + cli_utf16toascii@CLAMAV_PRIVATE 1.0.2 + cli_vba_inflate@CLAMAV_PRIVATE 1.0.2 + cli_vba_readdir@CLAMAV_PRIVATE 1.0.2 + cli_versig2@CLAMAV_PRIVATE 1.0.2 + cli_versig@CLAMAV_PRIVATE 1.0.2 + cli_warnmsg@CLAMAV_PRIVATE 1.0.2 + cli_wm_decrypt_macro@CLAMAV_PRIVATE 1.0.2 + cli_wm_readdir@CLAMAV_PRIVATE 1.0.2 + cli_writen@CLAMAV_PRIVATE 1.0.2 + decodeLine@CLAMAV_PRIVATE 1.0.2 + disasmbuf@CLAMAV_PRIVATE 1.0.2 + fmap@CLAMAV_PRIVATE 1.0.2 + fmap_dump_to_file@CLAMAV_PRIVATE 1.0.2 + fmap_duplicate@CLAMAV_PRIVATE 1.0.2 + free_duplicate_fmap@CLAMAV_PRIVATE 1.0.2 + get_fpu_endian@CLAMAV_PRIVATE 1.0.2 + have_clamjit@CLAMAV_PRIVATE 1.0.2 + have_rar@CLAMAV_PRIVATE 1.0.2 + html_normalise_map@CLAMAV_PRIVATE 1.0.2 + html_normalise_mem@CLAMAV_PRIVATE 1.0.2 + html_screnc_decode@CLAMAV_PRIVATE 1.0.2 + html_tag_arg_free@CLAMAV_PRIVATE 1.0.2 + init_allow_list@CLAMAV_PRIVATE 1.0.2 + init_domain_list@CLAMAV_PRIVATE 1.0.2 + init_regex_list@CLAMAV_PRIVATE 1.0.2 + is_regex_ok@CLAMAV_PRIVATE 1.0.2 + load_regex_matcher@CLAMAV_PRIVATE 1.0.2 + lsig_increment_subsig_match@CLAMAV_PRIVATE 1.0.2 lsig_sub_matched@CLAMAV_PUBLIC 1.0.0 - messageCreate@CLAMAV_PRIVATE 1.0.1 - messageDestroy@CLAMAV_PRIVATE 1.0.1 - mpool_calloc@CLAMAV_PRIVATE 1.0.1 - mpool_create@CLAMAV_PRIVATE 1.0.1 - mpool_destroy@CLAMAV_PRIVATE 1.0.1 - mpool_free@CLAMAV_PRIVATE 1.0.1 - mpool_getstats@CLAMAV_PRIVATE 1.0.1 - phishingScan@CLAMAV_PRIVATE 1.0.1 - phishing_done@CLAMAV_PRIVATE 1.0.1 - phishing_init@CLAMAV_PRIVATE 1.0.1 - readdb_parse_ldb_subsignature@CLAMAV_PRIVATE 1.0.1 - regex_list_add_pattern@CLAMAV_PRIVATE 1.0.1 - regex_list_done@CLAMAV_PRIVATE 1.0.1 - regex_list_match@CLAMAV_PRIVATE 1.0.1 - tableCreate@CLAMAV_PRIVATE 1.0.1 - tableDestroy@CLAMAV_PRIVATE 1.0.1 - tableFind@CLAMAV_PRIVATE 1.0.1 - tableInsert@CLAMAV_PRIVATE 1.0.1 - tableIterate@CLAMAV_PRIVATE 1.0.1 - tableRemove@CLAMAV_PRIVATE 1.0.1 - tableUpdate@CLAMAV_PRIVATE 1.0.1 - text_normalize_init@CLAMAV_PRIVATE 1.0.1 - text_normalize_map@CLAMAV_PRIVATE 1.0.1 - text_normalize_reset@CLAMAV_PRIVATE 1.0.1 - uniq_add@CLAMAV_PRIVATE 1.0.1 - uniq_free@CLAMAV_PRIVATE 1.0.1 - uniq_get@CLAMAV_PRIVATE 1.0.1 - uniq_init@CLAMAV_PRIVATE 1.0.1 + messageCreate@CLAMAV_PRIVATE 1.0.2 + messageDestroy@CLAMAV_PRIVATE 1.0.2 + mpool_calloc@CLAMAV_PRIVATE 1.0.2 + mpool_create@CLAMAV_PRIVATE 1.0.2 + mpool_destroy@CLAMAV_PRIVATE 1.0.2 + mpool_free@CLAMAV_PRIVATE 1.0.2 + mpool_getstats@CLAMAV_PRIVATE 1.0.2 + phishingScan@CLAMAV_PRIVATE 1.0.2 + phishing_done@CLAMAV_PRIVATE 1.0.2 + phishing_init@CLAMAV_PRIVATE 1.0.2 + readdb_parse_ldb_subsignature@CLAMAV_PRIVATE 1.0.2 + regex_list_add_pattern@CLAMAV_PRIVATE 1.0.2 + regex_list_done@CLAMAV_PRIVATE 1.0.2 + regex_list_match@CLAMAV_PRIVATE 1.0.2 + tableCreate@CLAMAV_PRIVATE 1.0.2 + tableDestroy@CLAMAV_PRIVATE 1.0.2 + tableFind@CLAMAV_PRIVATE 1.0.2 + tableInsert@CLAMAV_PRIVATE 1.0.2 + tableIterate@CLAMAV_PRIVATE 1.0.2 + tableRemove@CLAMAV_PRIVATE 1.0.2 + tableUpdate@CLAMAV_PRIVATE 1.0.2 + text_normalize_init@CLAMAV_PRIVATE 1.0.2 + text_normalize_map@CLAMAV_PRIVATE 1.0.2 + text_normalize_reset@CLAMAV_PRIVATE 1.0.2 + uniq_add@CLAMAV_PRIVATE 1.0.2 + uniq_free@CLAMAV_PRIVATE 1.0.2 + uniq_get@CLAMAV_PRIVATE 1.0.2 + uniq_init@CLAMAV_PRIVATE 1.0.2 libfreshclam.so.2 libclamav11 #MINVER# FRESHCLAM_PRIVATE@FRESHCLAM_PRIVATE 1.0.1 FRESHCLAM_PUBLIC@FRESHCLAM_PUBLIC 1.0.0 diff -Nru clamav-1.0.1+dfsg/debian/patches/Add-a-version-script-for-libclamav-and-libfreshclam.patch clamav-1.0.2+dfsg/debian/patches/Add-a-version-script-for-libclamav-and-libfreshclam.patch --- clamav-1.0.1+dfsg/debian/patches/Add-a-version-script-for-libclamav-and-libfreshclam.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/Add-a-version-script-for-libclamav-and-libfreshclam.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From 6970538d0bd11d97d414a6744a670dbe6d8b8909 Mon Sep 17 00:00:00 2001 +From badbc203114a85e8bdec2deecc25f899331c05f4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 2 Jan 2023 15:51:42 +0100 Subject: Add a version script for libclamav and libfreshclam @@ -20,10 +20,10 @@ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt -index 0d35e3a..61346b9 100644 +index 6bc426f..f0b3fdf 100644 --- a/libclamav/CMakeLists.txt +++ b/libclamav/CMakeLists.txt -@@ -508,7 +508,8 @@ if(ENABLE_SHARED_LIB) +@@ -505,7 +505,8 @@ if(ENABLE_SHARED_LIB) add_library( clamav SHARED ) set_target_properties( clamav PROPERTIES VERSION ${LIBCLAMAV_VERSION} @@ -33,7 +33,7 @@ target_sources( clamav PRIVATE ${LIBCLAMAV_SOURCES} -@@ -554,6 +555,8 @@ if(ENABLE_SHARED_LIB) +@@ -551,6 +552,8 @@ if(ENABLE_SHARED_LIB) Iconv::Iconv ${CMAKE_DL_LIBS} m ) diff -Nru clamav-1.0.1+dfsg/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch clamav-1.0.2+dfsg/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch --- clamav-1.0.1+dfsg/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From b1b7438ad627cb935c5e7b9923342bc2e26d4137 Mon Sep 17 00:00:00 2001 +From 9824d59fcb634e349490a49997a294ab6a9f7020 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 6 Jan 2023 23:11:00 +0100 Subject: Add an option to avoid setting RPATH on unix systems. @@ -16,7 +16,7 @@ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index a146bc1..ca8fd0c 100644 +index 38a69f3..6bf9620 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,13 +174,13 @@ endif() @@ -36,10 +36,10 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_FULL_LIBDIR}") else() diff --git a/CMakeOptions.cmake b/CMakeOptions.cmake -index 2b377cd..aac5854 100644 +index d995bac..9275352 100644 --- a/CMakeOptions.cmake +++ b/CMakeOptions.cmake -@@ -123,3 +123,6 @@ option(ENABLE_EXTERNAL_TOMFASTMATH +@@ -120,3 +120,6 @@ option(ENABLE_SYSTEMD # Rust Targets: https://doc.rust-lang.org/nightly/rustc/platform-support.html option(RUST_COMPILER_TARGET "Use a custom target triple to build the Rust components. Needed for cross-compiling.") diff -Nru clamav-1.0.1+dfsg/debian/patches/Change-paths-in-sample-conf-file-to-match-Debian.patch clamav-1.0.2+dfsg/debian/patches/Change-paths-in-sample-conf-file-to-match-Debian.patch --- clamav-1.0.1+dfsg/debian/patches/Change-paths-in-sample-conf-file-to-match-Debian.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/Change-paths-in-sample-conf-file-to-match-Debian.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From feedfa082d0b4107768ecb8fbede3e9488291924 Mon Sep 17 00:00:00 2001 +From 3ba303b9d6e78d5b2b7f28b71efda5e0812dba14 Mon Sep 17 00:00:00 2001 From: Scott Kitterman Date: Mon, 10 Mar 2014 19:20:18 -0400 Subject: Change paths in sample conf file to match Debian diff -Nru clamav-1.0.1+dfsg/debian/patches/Remove-bundled-tomfastmath-library.patch clamav-1.0.2+dfsg/debian/patches/Remove-bundled-tomfastmath-library.patch --- clamav-1.0.1+dfsg/debian/patches/Remove-bundled-tomfastmath-library.patch 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/Remove-bundled-tomfastmath-library.patch 2023-08-27 09:35:11.000000000 +0000 @@ -0,0 +1,243 @@ +From de9cef7ab6e5a57247f9598340a0e64869429870 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Sat, 18 Feb 2023 10:45:50 +0100 +Subject: Remove bundled tomfastmath library. + +Now that the tomfastmath library is no longer used, remove it from the +tree. + +Patch-Name: Remove-bundled-tomfastmath-library.patch +Signed-off-by: Sebastian Andrzej Siewior +--- + .github/workflows/clang-format.yml | 2 +- + README.md | 1 - + clamav-config.h.cmake.in | 3 - + libclamav/CMakeLists.txt | 104 ----------------------------- + libclamav/Doxyfile | 13 +--- + unit_tests/CMakeLists.txt | 3 - + 6 files changed, 2 insertions(+), 124 deletions(-) + +diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml +index b3cbab1..8ededc0 100644 +--- a/.github/workflows/clang-format.yml ++++ b/.github/workflows/clang-format.yml +@@ -22,7 +22,7 @@ name: clang-format + matrix: + path: + - check: "libclamav" +- exclude: "(iana_cctld|bytecode_api_|bytecode_hooks|rijndael|yara|inffixed|inflate|queue|tomsfastmath|nsis|7z|regex|c++|generated)" ++ exclude: "(iana_cctld|bytecode_api_|bytecode_hooks|rijndael|yara|inffixed|inflate|queue|nsis|7z|regex|c++|generated)" + - check: "libfreshclam" + exclude: "" + - check: "clamav-milter" +diff --git a/README.md b/README.md +index 548d672..68dec8c 100644 +--- a/README.md ++++ b/README.md +@@ -113,7 +113,6 @@ ClamAV contains a number of components that include code copied in part or in + whole from 3rd party projects and whose code is not owned by Cisco and which + are licensed differently than ClamAV. These include: + +-- tomsfastmath: public domain + - Yara: Apache 2.0 license + - Yara has since switched to the BSD 3-Clause License; + Our source is out-of-date and needs to be updated. +diff --git a/clamav-config.h.cmake.in b/clamav-config.h.cmake.in +index 5de4cbf..b21af87 100644 +--- a/clamav-config.h.cmake.in ++++ b/clamav-config.h.cmake.in +@@ -401,9 +401,6 @@ + /* Define if UNRAR is linked instead of loaded. */ + #cmakedefine UNRAR_LINKED 1 + +-/* Define if UNRAR is linked instead of loaded. */ +-#cmakedefine HAVE_SYSTEM_TOMSFASTMATH 1 +- + /* "Full clamav library version number" */ + #define LIBCLAMAV_FULLVER "@LIBCLAMAV_VERSION@" + +diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt +index 5eb2e95..82f4e0a 100644 +--- a/libclamav/CMakeLists.txt ++++ b/libclamav/CMakeLists.txt +@@ -23,15 +23,6 @@ endif() + + add_definitions(-DTHIS_IS_LIBCLAMAV) + +-# Enable overflow checks in TomsFastMath's fp_exptmod() function. +-add_definitions(-DTFM_CHECK) +- +-# Just enable ASM in in TomsFastMath's on x86-64 where we know it works. +-# on i686 we run out of registers with -fPIC, and on ia64 we miscompile. +-if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_SIZEOF_VOID_P EQUAL 8)) +- add_definitions(-DTFM_NO_ASM) +-endif() +- + # 3rd party libraries included in libclamav + add_library( regex OBJECT ) + target_sources( regex +@@ -166,99 +157,6 @@ target_link_libraries( yara + PCRE2::pcre2 + JSONC::jsonc ) + +-add_library( tomsfastmath OBJECT ) +-target_sources( tomsfastmath +- PRIVATE +- tomsfastmath/addsub/fp_add.c +- tomsfastmath/addsub/fp_add_d.c +- tomsfastmath/addsub/fp_addmod.c +- tomsfastmath/addsub/fp_cmp.c +- tomsfastmath/addsub/fp_cmp_d.c +- tomsfastmath/addsub/fp_cmp_mag.c +- tomsfastmath/addsub/fp_sub.c +- tomsfastmath/addsub/fp_sub_d.c +- tomsfastmath/addsub/fp_submod.c +- tomsfastmath/addsub/s_fp_add.c +- tomsfastmath/addsub/s_fp_sub.c +- tomsfastmath/bin/fp_radix_size.c +- tomsfastmath/bin/fp_read_radix.c +- tomsfastmath/bin/fp_read_signed_bin.c +- tomsfastmath/bin/fp_read_unsigned_bin.c +- tomsfastmath/bin/fp_reverse.c +- tomsfastmath/bin/fp_s_rmap.c +- tomsfastmath/bin/fp_signed_bin_size.c +- tomsfastmath/bin/fp_to_signed_bin.c +- tomsfastmath/bin/fp_to_unsigned_bin.c +- tomsfastmath/bin/fp_toradix.c +- tomsfastmath/bin/fp_toradix_n.c +- tomsfastmath/bin/fp_unsigned_bin_size.c +- tomsfastmath/bit/fp_cnt_lsb.c +- tomsfastmath/bit/fp_count_bits.c +- tomsfastmath/bit/fp_div_2.c +- tomsfastmath/bit/fp_div_2d.c +- tomsfastmath/bit/fp_lshd.c +- tomsfastmath/bit/fp_mod_2d.c +- tomsfastmath/bit/fp_rshd.c +- tomsfastmath/divide/fp_div.c +- tomsfastmath/divide/fp_div_d.c +- tomsfastmath/divide/fp_mod.c +- tomsfastmath/divide/fp_mod_d.c +- tomsfastmath/exptmod/fp_2expt.c +- tomsfastmath/exptmod/fp_exptmod.c +- tomsfastmath/misc/fp_ident.c +- tomsfastmath/misc/fp_set.c +- tomsfastmath/mont/fp_montgomery_calc_normalization.c +- tomsfastmath/mont/fp_montgomery_reduce.c +- tomsfastmath/mont/fp_montgomery_setup.c +- tomsfastmath/mul/fp_mul.c +- tomsfastmath/mul/fp_mul_comba.c +- tomsfastmath/mul/fp_mul_2.c +- tomsfastmath/mul/fp_mul_2d.c +- tomsfastmath/mul/fp_mul_comba_12.c +- tomsfastmath/mul/fp_mul_comba_17.c +- tomsfastmath/mul/fp_mul_comba_20.c +- tomsfastmath/mul/fp_mul_comba_24.c +- tomsfastmath/mul/fp_mul_comba_28.c +- tomsfastmath/mul/fp_mul_comba_3.c +- tomsfastmath/mul/fp_mul_comba_32.c +- tomsfastmath/mul/fp_mul_comba_4.c +- tomsfastmath/mul/fp_mul_comba_48.c +- tomsfastmath/mul/fp_mul_comba_6.c +- tomsfastmath/mul/fp_mul_comba_64.c +- tomsfastmath/mul/fp_mul_comba_7.c +- tomsfastmath/mul/fp_mul_comba_8.c +- tomsfastmath/mul/fp_mul_comba_9.c +- tomsfastmath/mul/fp_mul_comba_small_set.c +- tomsfastmath/mul/fp_mul_d.c +- tomsfastmath/mul/fp_mulmod.c +- tomsfastmath/numtheory/fp_invmod.c +- tomsfastmath/sqr/fp_sqr.c +- tomsfastmath/sqr/fp_sqr_comba_12.c +- tomsfastmath/sqr/fp_sqr_comba_17.c +- tomsfastmath/sqr/fp_sqr_comba_20.c +- tomsfastmath/sqr/fp_sqr_comba_24.c +- tomsfastmath/sqr/fp_sqr_comba_28.c +- tomsfastmath/sqr/fp_sqr_comba_3.c +- tomsfastmath/sqr/fp_sqr_comba_32.c +- tomsfastmath/sqr/fp_sqr_comba_4.c +- tomsfastmath/sqr/fp_sqr_comba_48.c +- tomsfastmath/sqr/fp_sqr_comba_6.c +- tomsfastmath/sqr/fp_sqr_comba_64.c +- tomsfastmath/sqr/fp_sqr_comba_7.c +- tomsfastmath/sqr/fp_sqr_comba_8.c +- tomsfastmath/sqr/fp_sqr_comba_9.c +- tomsfastmath/sqr/fp_sqr_comba_generic.c +- tomsfastmath/sqr/fp_sqr_comba_small_set.c +- tomsfastmath/sqr/fp_sqrmod.c +- ) +-target_include_directories( tomsfastmath +- PRIVATE +- ${CMAKE_BINARY_DIR} +- ${CMAKE_CURRENT_SOURCE_DIR}/tomsfastmath/headers +- PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ) +-set_target_properties( tomsfastmath PROPERTIES +- COMPILE_FLAGS "${WARNCFLAGS}" ) +- + # Bytecode Runtime + add_library( bytecode_runtime OBJECT ) + if(LLVM_FOUND) +@@ -525,7 +423,6 @@ if(ENABLE_SHARED_LIB) + regex + lzma_sdk + yara +- tomsfastmath + bytecode_runtime + ${LIBMSPACK} + ClamAV::libclamav_rust +@@ -637,7 +534,6 @@ if(ENABLE_STATIC_LIB) + regex + lzma_sdk + yara +- tomsfastmath + bytecode_runtime + ${LIBMSPACK} + ClamAV::libclamav_rust +diff --git a/libclamav/Doxyfile b/libclamav/Doxyfile +index a83cf22..a2593ea 100644 +--- a/libclamav/Doxyfile ++++ b/libclamav/Doxyfile +@@ -111,15 +111,4 @@ INPUT = . \ + jsparse \ + jsparse/generated \ + nsis \ +- regex \ +- tomsfastmath \ +- tomsfastmath/addsub \ +- tomsfastmath/bin \ +- tomsfastmath/bit \ +- tomsfastmath/divide \ +- tomsfastmath/exptmod \ +- tomsfastmath/misc \ +- tomsfastmath/mont \ +- tomsfastmath/mul \ +- tomsfastmath/numtheory \ +- tomsfastmath/sqr ++ regex +diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt +index 567e95e..0b3d565 100644 +--- a/unit_tests/CMakeLists.txt ++++ b/unit_tests/CMakeLists.txt +@@ -49,7 +49,6 @@ if(ENABLE_APP) + PRIVATE + ClamAV::libclamav + libcheck::check +- tomsfastmath + JSONC::jsonc + ${LIBMSPACK} + OpenSSL::SSL +@@ -85,7 +84,6 @@ if(ENABLE_APP) + ClamAV::libclamav + ClamAV::common + libcheck::check +- tomsfastmath + JSONC::jsonc + ${LIBMSPACK} + OpenSSL::SSL +@@ -133,7 +131,6 @@ target_link_libraries(check_clamav + PRIVATE + ClamAV::libclamav + libcheck::check +- tomsfastmath + JSONC::jsonc + ${LIBMSPACK} + OpenSSL::SSL diff -Nru clamav-1.0.1+dfsg/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch clamav-1.0.2+dfsg/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch --- clamav-1.0.1+dfsg/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/Use-either-system-s-tomfastmath-library-or-the-built.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,231 +0,0 @@ -From c9cdab0a6a11d86f7f39973b86f9752fc2000f54 Mon Sep 17 00:00:00 2001 -From: Sebastian Andrzej Siewior -Date: Fri, 30 Dec 2022 19:06:28 +0100 -Subject: Use either system's tomfastmath library or the built-in one. - -Patch-Name: Use-either-system-s-tomfastmath-library-or-the-built.patch -Signed-off-by: Sebastian Andrzej Siewior ---- - CMakeLists.txt | 15 +++++++ - CMakeOptions.cmake | 3 ++ - cmake/FindTOMSFASTMATH.cmake | 85 ++++++++++++++++++++++++++++++++++++ - libclamav/CMakeLists.txt | 7 ++- - unit_tests/CMakeLists.txt | 6 +-- - 5 files changed, 111 insertions(+), 5 deletions(-) - create mode 100644 cmake/FindTOMSFASTMATH.cmake - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index edaa6b4..a146bc1 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -460,6 +460,11 @@ if(ZLIB_FOUND) - set(HAVE_LIBZ 1) - endif() - -+find_package(TOMSFASTMATH REQUIRED) -+if(TOMSFASTMATH_FOUND) -+ set(HAVE_TFM_H 1) -+endif() -+ - find_package(BZip2 REQUIRED) - if(BZIP2_FOUND) - set(HAVE_BZLIB_H 1) -@@ -1205,6 +1210,16 @@ ${_} ${e}${OPENSSL_LIBRARIES} - ${b} JSON support: ${e} - ${_} json-c ${e}${JSONC_INCLUDE_DIRS} - ${_} ${e}${JSONC_LIBRARIES} -+${b} Math support: ${e}") -+if(TOMFASTMATH_BUILTIN) -+message("\ -+${_} tomsfastmath ${e}built-in") -+else() -+message("\ -+${_} tomsfastmath ${e}${TOMSFASTMATH_INCLUDE_DIR} -+${_} ${e}${TOMSFASTMATH_LIBRARY}") -+endif() -+message("\ - ${b} Threading support: ${e}") - if(WIN32) - message("\ -diff --git a/CMakeOptions.cmake b/CMakeOptions.cmake -index d995bac..2b377cd 100644 ---- a/CMakeOptions.cmake -+++ b/CMakeOptions.cmake -@@ -116,6 +116,9 @@ option(ENABLE_SYSTEMD - "Install systemd service files if systemd is found." - ${ENABLE_SYSTEMD_DEFAULT}) - -+option(ENABLE_EXTERNAL_TOMFASTMATH -+ "Use system's tomfastmath instead of internal bundled version.") -+ - # For reference determining target platform: - # Rust Targets: https://doc.rust-lang.org/nightly/rustc/platform-support.html - option(RUST_COMPILER_TARGET -diff --git a/cmake/FindTOMSFASTMATH.cmake b/cmake/FindTOMSFASTMATH.cmake -new file mode 100644 -index 0000000..abe1b10 ---- /dev/null -+++ b/cmake/FindTOMSFASTMATH.cmake -@@ -0,0 +1,85 @@ -+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -+# file Copyright.txt or https://cmake.org/licensing for details. -+ -+#[=======================================================================[.rst: -+FindTOMSFASTMATH -+------- -+ -+Finds the TOMSFASTMATH library. -+ -+Imported Targets -+^^^^^^^^^^^^^^^^ -+ -+This module provides the following imported targets, if found: -+ -+``TOMSFASTMATH::tfm`` -+The TOMSFASTMATH library -+ -+Result Variables -+^^^^^^^^^^^^^^^^ -+ -+This will define the following variables: -+ -+``TOMSFASTMATH_FOUND`` -+True if the system has the TOMSFASTMATH library. -+``TOMSFASTMATH_VERSION`` -+The version of the TOMSFASTMATH library which was found. -+``TOMSFASTMATH_INCLUDE_DIRS`` -+Include directories needed to use TOMSFASTMATH. -+``TOMSFASTMATH_LIBRARIES`` -+Libraries needed to link to TOMSFASTMATH. -+ -+Cache Variables -+^^^^^^^^^^^^^^^ -+ -+The following cache variables may also be set: -+ -+``TOMSFASTMATH_INCLUDE_DIR`` -+ The directory containing ``tfm.h``. -+ ``TOMSFASTMATH_LIBRARY`` -+ The path to the TOMSFASTMATH library. -+ -+#]=======================================================================] -+ -+if(NOT ENABLE_EXTERNAL_TOMFASTMATH) -+ set(TOMFASTMATH_LIB_NAME "tomsfastmath") -+ set(TOMFASTMATH_BUILTIN 1) -+else() -+ set(TOMFASTMATH_LIB_NAME "tfm") -+ add_definitions(-DHAVE_SYSTEM_TOMSFASTMATH) -+ -+find_package(PkgConfig QUIET) -+pkg_check_modules(PC_TOMSFASTMATH QUIET tomsfastmath) -+ -+find_path(TOMSFASTMATH_INCLUDE_DIR -+ NAMES tfm.h -+ PATHS ${PC_TOMSFASTMATH_INCLUDE_DIRS} -+ PATH_SUFFIXES tfm -+) -+find_library(TOMSFASTMATH_LIBRARY -+ NAMES tfm -+ PATHS ${PC_TOMSFASTMATH_LIBRARY_DIRS} -+) -+ -+set(TOMSFASTMATH_VERSION ${PC_TOMSFASTMATH_VERSION}) -+ -+include(FindPackageHandleStandardArgs) -+find_package_handle_standard_args(TOMSFASTMATH -+ FOUND_VAR TOMSFASTMATH_FOUND -+ REQUIRED_VARS -+ TOMSFASTMATH_LIBRARY -+ TOMSFASTMATH_INCLUDE_DIR -+ VERSION_VAR TOMSFASTMATH_VERSION -+) -+ -+if(TOMSFASTMATH_FOUND) -+ set(TOMSFASTMATH_LIBRARIES ${TOMSFASTMATH_LIBRARY}) -+ set(TOMSFASTMATH_INCLUDE_DIRS ${TOMSFASTMATH_INCLUDE_DIR}) -+ set(TOMSFASTMATH_DEFINITIONS ${PC_TOMSFASTMATH_CFLAGS_OTHER}) -+endif() -+ -+mark_as_advanced( -+ TOMSFASTMATH_INCLUDE_DIR -+ TOMSFASTMATH_LIBRARY -+) -+endif() -diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt -index 6bc426f..0d35e3a 100644 ---- a/libclamav/CMakeLists.txt -+++ b/libclamav/CMakeLists.txt -@@ -3,6 +3,7 @@ - include_directories( - ${LIBXML2_INCLUDE_DIR} - ${OPENSSL_INCLUDE_DIR} -+ ${TOMSFASTMATH_INCLUDE_DIR} - ${ZLIB_INCLUDE_DIR} - ${CMAKE_CURRENT_BINARY_DIR} - $ -@@ -166,6 +167,7 @@ target_link_libraries( yara - PCRE2::pcre2 - JSONC::jsonc ) - -+if(TOMFASTMATH_BUILTIN) - add_library( tomsfastmath OBJECT ) - target_sources( tomsfastmath - PRIVATE -@@ -259,6 +261,7 @@ target_include_directories( tomsfastmath - PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ) - set_target_properties( tomsfastmath PROPERTIES - COMPILE_FLAGS "${WARNCFLAGS}" ) -+endif() - - # Bytecode Runtime - add_library( bytecode_runtime OBJECT ) -@@ -525,7 +528,7 @@ if(ENABLE_SHARED_LIB) - regex - lzma_sdk - yara -- tomsfastmath -+ ${TOMFASTMATH_LIB_NAME} - bytecode_runtime - ${LIBMSPACK} - ClamAV::libclamav_rust -@@ -635,7 +638,7 @@ if(ENABLE_STATIC_LIB) - regex - lzma_sdk - yara -- tomsfastmath -+ ${TOMFASTMATH_LIB_NAME} - bytecode_runtime - ${LIBMSPACK} - ClamAV::libclamav_rust -diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt -index 567e95e..0122929 100644 ---- a/unit_tests/CMakeLists.txt -+++ b/unit_tests/CMakeLists.txt -@@ -49,7 +49,7 @@ if(ENABLE_APP) - PRIVATE - ClamAV::libclamav - libcheck::check -- tomsfastmath -+ ${TOMFASTMATH_LIB_NAME} - JSONC::jsonc - ${LIBMSPACK} - OpenSSL::SSL -@@ -85,7 +85,7 @@ if(ENABLE_APP) - ClamAV::libclamav - ClamAV::common - libcheck::check -- tomsfastmath -+ ${TOMFASTMATH_LIB_NAME} - JSONC::jsonc - ${LIBMSPACK} - OpenSSL::SSL -@@ -133,7 +133,7 @@ target_link_libraries(check_clamav - PRIVATE - ClamAV::libclamav - libcheck::check -- tomsfastmath -+ ${TOMFASTMATH_LIB_NAME} - JSONC::jsonc - ${LIBMSPACK} - OpenSSL::SSL diff -Nru clamav-1.0.1+dfsg/debian/patches/cargo-Remove-windows-referenfes.patch clamav-1.0.2+dfsg/debian/patches/cargo-Remove-windows-referenfes.patch --- clamav-1.0.1+dfsg/debian/patches/cargo-Remove-windows-referenfes.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/cargo-Remove-windows-referenfes.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From 95172b6139277bb52e3970d5d595f12f14fa3efc Mon Sep 17 00:00:00 2001 +From 410b3222b6d7975649468a0470a5dea31476b04e Mon Sep 17 00:00:00 2001 From: Scott Kitterman Date: Sat, 31 Dec 2022 12:12:58 +0100 Subject: cargo: Remove windows referenfes. @@ -20,8 +20,8 @@ .../.cargo/vendor/remove_dir_all/Cargo.toml | 4 ++-- .../.cargo/vendor/tempfile/.cargo-checksum.json | 2 +- libclamav_rust/.cargo/vendor/tempfile/Cargo.toml | 6 +++--- - .../.cargo/vendor/termcolor/.cargo-checksum.json | 2 +- - libclamav_rust/.cargo/vendor/termcolor/Cargo.toml | 4 ++-- + .../.cargo/vendor/which/.cargo-checksum.json | 2 +- + libclamav_rust/.cargo/vendor/which/Cargo.toml | 4 ++-- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/ansi_term/.cargo-checksum.json @@ -141,23 +141,23 @@ +#[target."cfg(windows)".dependencies.winapi] +#version = "0.3" +#features = ["fileapi", "handleapi", "winbase"] -diff --git a/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -index 6032d23..72c84f9 100644 ---- a/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -+++ b/libclamav_rust/.cargo/vendor/termcolor/.cargo-checksum.json -@@ -1 +1 @@ --{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"2e1ffefd2c70d47b5097d7ecc26184d92e4e2be1174c53147a617729024a4a51","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"cc4c882bde8d2ef26ef4770ff30d60eda603d87ae32e16d99525dc88f3377238","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/lib.rs":"fe62bc640112ffb687366fbe4a084ed3bf749185f77d1e401757ab148313fb7e"},"package":"be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"} -\ No newline at end of file -+{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"cc4c882bde8d2ef26ef4770ff30d60eda603d87ae32e16d99525dc88f3377238","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/lib.rs":"fe62bc640112ffb687366fbe4a084ed3bf749185f77d1e401757ab148313fb7e"},"package":"be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"} -diff --git a/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml b/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -index dbdb6e8..6d7abff 100644 ---- a/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -+++ b/libclamav_rust/.cargo/vendor/termcolor/Cargo.toml -@@ -36,5 +36,5 @@ bench = false +diff --git a/libclamav_rust/.cargo/vendor/which/.cargo-checksum.json b/libclamav_rust/.cargo/vendor/which/.cargo-checksum.json +index 7b3c3ca..69fea07 100644 +--- a/libclamav_rust/.cargo/vendor/which/.cargo-checksum.json ++++ b/libclamav_rust/.cargo/vendor/which/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"Cargo.toml":"52b2c35d7270c5db75872052c2f8e56740f3c4ccf48b4a17be7b600c57bf24a0","LICENSE.txt":"0041560f5d419c30e1594567f3b7ac2bc078ff6a68f437e0348ba85d9cf99112","README.md":"8b16d6a129cb05c3b6ed15e5eacbd7ca488a5005f3d22d3376cc75157996f1dc","src/checker.rs":"e17ca8bcccedfba17ba027e86de970a01d6d207ba442174184952966eeaba140","src/error.rs":"00315874353628366851cd0817a60059cb2c784fd315407a2c30f38021b18dc6","src/finder.rs":"71d09b164ebf51e70dc67b6e4db78bc1c10afedc6473b1edb795d36bd3a3c83b","src/helper.rs":"42cf60a98c017fcbf96d8cbf5880398b4f191c4b2445c43028c35ad57a1b846a","src/lib.rs":"53926af5cadb33966a6d7e0bdd87a48470ac703f144da77212edbedf88bb0692","tests/basic.rs":"90e2c26bc1402fea996e91342f0c299cc91fb54e82445b0bb46715a77660059b"},"package":"2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"} +\ No newline at end of file ++{"files":{"LICENSE.txt":"0041560f5d419c30e1594567f3b7ac2bc078ff6a68f437e0348ba85d9cf99112","README.md":"8b16d6a129cb05c3b6ed15e5eacbd7ca488a5005f3d22d3376cc75157996f1dc","src/checker.rs":"e17ca8bcccedfba17ba027e86de970a01d6d207ba442174184952966eeaba140","src/error.rs":"00315874353628366851cd0817a60059cb2c784fd315407a2c30f38021b18dc6","src/finder.rs":"71d09b164ebf51e70dc67b6e4db78bc1c10afedc6473b1edb795d36bd3a3c83b","src/helper.rs":"42cf60a98c017fcbf96d8cbf5880398b4f191c4b2445c43028c35ad57a1b846a","src/lib.rs":"53926af5cadb33966a6d7e0bdd87a48470ac703f144da77212edbedf88bb0692","tests/basic.rs":"90e2c26bc1402fea996e91342f0c299cc91fb54e82445b0bb46715a77660059b"},"package":"2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"} +diff --git a/libclamav_rust/.cargo/vendor/which/Cargo.toml b/libclamav_rust/.cargo/vendor/which/Cargo.toml +index ff6894a..6655f3f 100644 +--- a/libclamav_rust/.cargo/vendor/which/Cargo.toml ++++ b/libclamav_rust/.cargo/vendor/which/Cargo.toml +@@ -46,5 +46,5 @@ optional = true + [dev-dependencies.tempfile] + version = "3.3.0" - [dev-dependencies] - --[target."cfg(windows)".dependencies.winapi-util] --version = "0.1.3" -+#[target."cfg(windows)".dependencies.winapi-util] -+#version = "0.1.3" +-[target."cfg(windows)".dependencies.once_cell] +-version = "1" ++#[target."cfg(windows)".dependencies.once_cell] ++#version = "1" diff -Nru clamav-1.0.1+dfsg/debian/patches/clamd_dont_depend_on_clamav_demon_socket.patch clamav-1.0.2+dfsg/debian/patches/clamd_dont_depend_on_clamav_demon_socket.patch --- clamav-1.0.1+dfsg/debian/patches/clamd_dont_depend_on_clamav_demon_socket.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/clamd_dont_depend_on_clamav_demon_socket.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -From 8a47e2c733b7b5ffda3eac7605ded80e935f510c Mon Sep 17 00:00:00 2001 -From: Sebastian Andrzej Siewior -Date: Thu, 11 Aug 2016 21:54:10 +0200 -Subject: clamd: don't depend on clamav-demon.socket - -Let's try to live without it. -This should avoid the endless loop in #824042. Newer systemd have -rate-limiting on (re)starts. This rate-limiting would stop the socket -service. The only purpose for the socket activation is to get clamd -started after the initial freshclam run on installs so I think we can -live without and manually start the daemon after installation. - -Patch-Name: clamd_dont_depend_on_clamav_demon_socket.patch -Signed-off-by: Sebastian Andrzej Siewior ---- - clamd/clamav-daemon.service.in | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/clamd/clamav-daemon.service.in b/clamd/clamav-daemon.service.in -index 579a512..d84a2b0 100644 ---- a/clamd/clamav-daemon.service.in -+++ b/clamd/clamav-daemon.service.in -@@ -1,7 +1,6 @@ - [Unit] - Description=Clam AntiVirus userspace daemon - Documentation=man:clamd(8) man:clamd.conf(5) https://docs.clamav.net/ --Requires=clamav-daemon.socket - # Check for database existence - ConditionPathExistsGlob=@DATADIR@/main.{c[vl]d,inc} - ConditionPathExistsGlob=@DATADIR@/daily.{c[vl]d,inc} -@@ -14,4 +13,3 @@ TimeoutStartSec=420 - - [Install] - WantedBy=multi-user.target --Also=clamav-daemon.socket diff -Nru clamav-1.0.1+dfsg/debian/patches/libclamav-Add-missing-symbols.patch clamav-1.0.2+dfsg/debian/patches/libclamav-Add-missing-symbols.patch --- clamav-1.0.1+dfsg/debian/patches/libclamav-Add-missing-symbols.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/libclamav-Add-missing-symbols.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From a9827003989996fdee7295de7af2c2cb976a00aa Mon Sep 17 00:00:00 2001 +From 79c704216edd15b41173f889d468f2e01a5aeb16 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 2 Jan 2023 16:20:39 +0100 Subject: libclamav: Add missing symbols. diff -Nru clamav-1.0.1+dfsg/debian/patches/libclamav-Sort-libclamav.map-and-libfreshclam.map.patch clamav-1.0.2+dfsg/debian/patches/libclamav-Sort-libclamav.map-and-libfreshclam.map.patch --- clamav-1.0.1+dfsg/debian/patches/libclamav-Sort-libclamav.map-and-libfreshclam.map.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/libclamav-Sort-libclamav.map-and-libfreshclam.map.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From 591c101430987a4e8d1a01e2b2daccf35526438c Mon Sep 17 00:00:00 2001 +From 590e38aa59c54f91e0c1cbe542260c7bfb5d8e50 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 2 Jan 2023 16:13:35 +0100 Subject: libclamav: Sort libclamav.map and libfreshclam.map diff -Nru clamav-1.0.1+dfsg/debian/patches/libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch clamav-1.0.2+dfsg/debian/patches/libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch --- clamav-1.0.1+dfsg/debian/patches/libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch 2023-08-27 09:35:11.000000000 +0000 @@ -0,0 +1,1132 @@ +From 1ed2932ff2ba201883c4ddeb1cfc954d04f7f57e Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Sat, 18 Feb 2023 10:47:53 +0100 +Subject: libclamav: Use OpenSSL' BN instead tomfastmath. + +Use OpenSSL's big number/ multiprecision integer arithmetics +functionality to replace tomfastmath. + +This is a first shot at doing just this. Further improvement could be +use more RSA-signature verification from OpenSSL in crtmgr_rsa_verify() +and less self parsing. +_padding_check_PKCS1_type_1() has been borrowed from OpenSSL to make +further replacments easier. + +Patch-Name: libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch +Signed-off-by: Sebastian Andrzej Siewior +--- + libclamav/CMakeLists.txt | 3 +- + libclamav/asn1.c | 33 +++- + libclamav/bignum.h | 14 -- + libclamav/crtmgr.c | 405 ++++++++++++++++++++++++++------------- + libclamav/crtmgr.h | 14 +- + libclamav/dsig.c | 175 ++++++++++++----- + libclamav/dsig.h | 2 +- + libclamav/readdb.c | 24 ++- + libclamav/textnorm.c | 1 - + libclamav/xdp.c | 1 - + 10 files changed, 443 insertions(+), 229 deletions(-) + delete mode 100644 libclamav/bignum.h + +diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt +index f0b3fdf..5eb2e95 100644 +--- a/libclamav/CMakeLists.txt ++++ b/libclamav/CMakeLists.txt +@@ -250,8 +250,7 @@ target_sources( tomsfastmath + tomsfastmath/sqr/fp_sqr_comba_generic.c + tomsfastmath/sqr/fp_sqr_comba_small_set.c + tomsfastmath/sqr/fp_sqrmod.c +- PUBLIC +- bignum.h ) ++ ) + target_include_directories( tomsfastmath + PRIVATE + ${CMAKE_BINARY_DIR} +diff --git a/libclamav/asn1.c b/libclamav/asn1.c +index 1eec3b0..880dbc5 100644 +--- a/libclamav/asn1.c ++++ b/libclamav/asn1.c +@@ -24,10 +24,10 @@ + #endif + + #include ++#include + + #include "clamav.h" + #include "asn1.h" +-#include "bignum.h" + #include "matcher-hash.h" + + /* --------------------------------------------------------------------------- OIDS */ +@@ -695,7 +695,8 @@ static int asn1_get_rsa_pubkey(fmap_t *map, const void **asn1data, unsigned int + return 1; + } + +- fp_read_unsigned_bin(&x509->n, obj.content, avail2); ++ if (!BN_bin2bn(obj.content, avail2, x509->n)) ++ return 1; + + if (asn1_expect_objtype(map, obj.next, &avail, &obj, ASN1_TYPE_INTEGER)) /* INTEGER - exp */ + return 1; +@@ -712,7 +713,8 @@ static int asn1_get_rsa_pubkey(fmap_t *map, const void **asn1data, unsigned int + return 1; + } + +- fp_read_unsigned_bin(&x509->e, obj.content, obj.size); ++ if (!BN_bin2bn(obj.content, obj.size, x509->e)) ++ return 1; + + return 0; + } +@@ -738,9 +740,12 @@ static int asn1_get_x509(fmap_t *map, const void **asn1data, unsigned int *size, + int ret = ASN1_GET_X509_UNRECOVERABLE_ERROR; + unsigned int version; + +- cli_crt_init(&x509); +- + do { ++ if (cli_crt_init(&x509) < 0) { ++ cli_dbgmsg("asn1_get_x509: failed to initialize x509.\n"); ++ break; ++ } ++ + if (asn1_expect_objtype(map, *asn1data, size, &crt, ASN1_TYPE_SEQUENCE)) { /* SEQUENCE */ + cli_dbgmsg("asn1_get_x509: expected SEQUENCE at the x509 start\n"); + break; +@@ -1107,7 +1112,8 @@ static int asn1_get_x509(fmap_t *map, const void **asn1data, unsigned int *size, + break; + } + +- fp_read_unsigned_bin(&x509.sig, obj.content, obj.size); ++ if (!BN_bin2bn(obj.content, obj.size, x509.sig)) ++ break; + + if (crt.size) { + cli_dbgmsg("asn1_get_x509: found unexpected extra data in signature\n"); +@@ -1404,6 +1410,8 @@ static cl_error_t asn1_parse_mscat(struct cl_engine *engine, fmap_t *map, size_t + void *hash_ctx; + int result; + cl_error_t ret = CL_EPARSE; ++ char *mod = NULL; ++ char *exp = NULL; + + cli_dbgmsg("in asn1_parse_mscat\n"); + +@@ -1558,11 +1566,10 @@ static cl_error_t asn1_parse_mscat(struct cl_engine *engine, fmap_t *map, size_t + while (x509) { + char raw_issuer[CRT_RAWMAXLEN * 2 + 1], raw_subject[CRT_RAWMAXLEN * 2 + 1], raw_serial[CRT_RAWMAXLEN * 3 + 1]; + char issuer[SHA1_HASH_SIZE * 2 + 1], subject[SHA1_HASH_SIZE * 2 + 1], serial[SHA1_HASH_SIZE * 2 + 1]; +- char mod[1024 + 1], exp[1024 + 1]; +- int j = 1024; ++ int j; + +- fp_toradix_n(&x509->n, mod, 16, j + 1); +- fp_toradix_n(&x509->e, exp, 16, j + 1); ++ mod = BN_bn2hex(x509->n); ++ exp = BN_bn2hex(x509->e); + memset(raw_issuer, 0, CRT_RAWMAXLEN * 2 + 1); + memset(raw_subject, 0, CRT_RAWMAXLEN * 2 + 1); + memset(raw_serial, 0, CRT_RAWMAXLEN * 2 + 1); +@@ -1594,6 +1601,10 @@ static cl_error_t asn1_parse_mscat(struct cl_engine *engine, fmap_t *map, size_t + cli_dbgmsg(" raw_issuer: %s\n", raw_issuer); + + x509 = x509->next; ++ OPENSSL_free(mod); ++ OPENSSL_free(exp); ++ mod = NULL; ++ exp = NULL; + } + x509 = newcerts.crts; + } +@@ -2149,6 +2160,8 @@ static cl_error_t asn1_parse_mscat(struct cl_engine *engine, fmap_t *map, size_t + } while (0); + + finish: ++ OPENSSL_free(mod); ++ OPENSSL_free(exp); + if (CL_EPARSE == ret) { + cli_dbgmsg("asn1_parse_mscat: failed to parse authenticode section\n"); + } +diff --git a/libclamav/bignum.h b/libclamav/bignum.h +deleted file mode 100644 +index a1c6d6e..0000000 +--- a/libclamav/bignum.h ++++ /dev/null +@@ -1,14 +0,0 @@ +-#ifndef BIGNUM_H_ +-#define BIGNUM_H_ +- +-#if HAVE_CONFIG_H +-#include "clamav-config.h" +-#endif +- +-#if HAVE_SYSTEM_TOMSFASTMATH +-#include +-#else +-#include "tomsfastmath/headers/tfm.h" +-#endif +- +-#endif +diff --git a/libclamav/crtmgr.c b/libclamav/crtmgr.c +index 571b1a7..3943e14 100644 +--- a/libclamav/crtmgr.c ++++ b/libclamav/crtmgr.c +@@ -42,20 +42,39 @@ + #define OID_2_16_840_1_101_3_4_2_3 "\x60\x86\x48\x01\x65\x03\x04\x02\x03" + #define OID_sha512 OID_2_16_840_1_101_3_4_2_3 + +-#define FP_INIT_MULTI(a, b, c) (fp_init(a), fp_init(b), fp_init(c)) +-#define FP_CLEAR_MULTI(...) ++static int cli_crt_init_fps(cli_crt *x509) ++{ ++ x509->n = BN_new(); ++ x509->e = BN_new(); ++ x509->sig = BN_new(); + +-void cli_crt_init(cli_crt *x509) ++ if (!x509->n || !x509->e || !x509->sig) { ++ BN_free(x509->n); ++ BN_free(x509->e); ++ BN_free(x509->sig); ++ ++ x509->n = NULL; ++ x509->e = NULL; ++ x509->sig = NULL; ++ return -1; ++ } ++ return 0; ++} ++ ++int cli_crt_init(cli_crt *x509) + { + memset(x509, 0, sizeof(*x509)); +- +- // FP_INIT_MULTI is a memset for each and cannot fail. +- FP_INIT_MULTI(&x509->n, &x509->e, &x509->sig); ++ return cli_crt_init_fps(x509); + } + + void cli_crt_clear(cli_crt *x509) + { +- FP_CLEAR_MULTI(&x509->n, &x509->e, &x509->sig); ++ BN_free(x509->n); ++ BN_free(x509->e); ++ BN_free(x509->sig); ++ x509->n = NULL; ++ x509->e = NULL; ++ x509->sig = NULL; + } + + /* Look for an existing certificate in the trust store `m`. This search allows +@@ -118,7 +137,7 @@ cli_crt *crtmgr_trust_list_lookup(crtmgr *m, cli_crt *x509, int crb_crts_only) + if (x509->hashtype != i->hashtype || + memcmp(x509->issuer, i->issuer, sizeof(i->issuer)) || + x509->ignore_serial != i->ignore_serial || +- fp_cmp(&x509->e, &i->e)) { ++ BN_cmp(x509->e, i->e)) { + continue; + } + } +@@ -135,7 +154,7 @@ cli_crt *crtmgr_trust_list_lookup(crtmgr *m, cli_crt *x509, int crb_crts_only) + (i->codeSign | x509->codeSign) == i->codeSign && + (i->timeSign | x509->timeSign) == i->timeSign && + !memcmp(x509->subject, i->subject, sizeof(i->subject)) && +- !fp_cmp(&x509->n, &i->n)) { ++ !BN_cmp(x509->n, i->n)) { + return i; + } + } +@@ -166,7 +185,7 @@ cli_crt *crtmgr_block_list_lookup(crtmgr *m, cli_crt *x509) + + if (!i->isBlocked || + memcmp(i->subject, x509->subject, sizeof(i->subject)) || +- fp_cmp(&x509->n, &i->n)) { ++ BN_cmp(x509->n, i->n)) { + continue; + } + +@@ -191,37 +210,51 @@ cli_crt *crtmgr_lookup(crtmgr *m, cli_crt *x509) + } + } + +-int crtmgr_add(crtmgr *m, cli_crt *x509) ++bool crtmgr_add(crtmgr *m, cli_crt *x509) + { +- cli_crt *i; ++ bool failed = true; ++ cli_crt *i = NULL; + + if (x509->isBlocked) { + if (crtmgr_block_list_lookup(m, x509)) { + cli_dbgmsg("crtmgr_add: duplicate blocked certificate detected - not adding\n"); +- return 0; ++ failed = false; ++ goto done; + } + } else { + if (crtmgr_trust_list_lookup(m, x509, 0)) { + cli_dbgmsg("crtmgr_add: duplicate trusted certificate detected - not adding\n"); +- return 0; ++ failed = false; ++ goto done; + } + } + + i = cli_malloc(sizeof(*i)); +- if (!i) +- return 1; ++ if (i == NULL) { ++ goto done; ++ } + +- // FP_INIT_MULTI is a memset for each and cannot fail. +- FP_INIT_MULTI(&i->n, &i->e, &i->sig); ++ if (cli_crt_init_fps(i) < 0) { ++ goto done; ++ } + +- fp_copy(&x509->n, &i->n); +- fp_copy(&x509->e, &i->e); +- fp_copy(&x509->sig, &i->sig); ++ if (!BN_copy(i->n, x509->n)) { ++ goto done; ++ } ++ if (!BN_copy(i->e, x509->e)) { ++ goto done; ++ } ++ if (!BN_copy(i->sig, x509->sig)) { ++ goto done; ++ } + +- if ((x509->name)) ++ if (x509->name) { + i->name = strdup(x509->name); +- else ++ if (!i->name) ++ goto done; ++ } else { + i->name = NULL; ++ } + + memcpy(i->raw_subject, x509->raw_subject, sizeof(i->raw_subject)); + memcpy(i->raw_issuer, x509->raw_issuer, sizeof(i->raw_issuer)); +@@ -240,12 +273,23 @@ int crtmgr_add(crtmgr *m, cli_crt *x509) + i->isBlocked = x509->isBlocked; + i->next = m->crts; + i->prev = NULL; +- if (m->crts) ++ if (m->crts) { + m->crts->prev = i; ++ } + m->crts = i; + + m->items++; +- return 0; ++ ++ failed = false; ++ i = NULL; ++ ++done: ++ if (i != NULL) { ++ cli_crt_clear(i); ++ free(i); ++ } ++ ++ return failed; + } + + void crtmgr_init(crtmgr *m) +@@ -281,12 +325,133 @@ void crtmgr_free(crtmgr *m) + crtmgr_del(m, m->crts); + } + +-static int crtmgr_rsa_verify(cli_crt *x509, fp_int *sig, cli_crt_hashtype hashtype, const uint8_t *refhash) ++static cl_error_t _padding_check_PKCS1_type_1(uint8_t **to, int *tlen, ++ uint8_t *from, unsigned int flen, ++ unsigned int num) + { +- int keylen = fp_unsigned_bin_size(&x509->n), siglen = fp_unsigned_bin_size(sig); +- int ret, j, objlen, hashlen; +- uint8_t d[513]; +- fp_int x; ++ int i, j; ++ unsigned char *p; ++ ++ p = from; ++ ++ /* ++ * The format is ++ * 00 || 01 || PS || 00 || D ++ * PS - padding string, at least 8 bytes of FF ++ * D - data. ++ */ ++ ++ if (num < 11) /* RSA_PKCS1_PADDING_SIZE */ ++ return CL_EPARSE; ++ ++ /* Accept inputs with and without the leading 0-byte. */ ++ if (num == flen) { ++ if ((*p++) != 0x00) { ++ cli_dbgmsg("%s: Bad padding\n", __func__); ++ return CL_EPARSE; ++ } ++ flen--; ++ } ++ ++ if ((num != (flen + 1)) || (*(p++) != 0x01)) { ++ cli_dbgmsg("%s: Bad block type\n", __func__); ++ return CL_EPARSE; ++ } ++ ++ /* scan over padding data */ ++ j = flen - 1; /* one for type. */ ++ for (i = 0; i < j; i++) { ++ if (*p != 0xff) { /* should decrypt to 0xff */ ++ if (*p == 0) { ++ p++; ++ break; ++ } else { ++ cli_dbgmsg("%s: Bad header\n", __func__); ++ return CL_EPARSE; ++ } ++ } ++ p++; ++ } ++ ++ if (i == j) { ++ cli_dbgmsg("%s: Bad header\n", __func__); ++ return CL_EPARSE; ++ } ++ ++ if (i < 8) { ++ cli_dbgmsg("%s: Bad padding\n", __func__); ++ return CL_EPARSE; ++ } ++ i++; /* Skip over the '\0' */ ++ j -= i; ++ *tlen = j; ++ *to = p; ++ ++ return CL_SUCCESS; ++} ++ ++static cl_error_t crtmgr_get_recov_data(BIGNUM *sig, cli_crt *x509, ++ uint8_t **buffer, uint8_t **payload, ++ int *payload_len) ++{ ++ BN_CTX *bnctx; ++ int pad_size; ++ int keylen; ++ uint8_t *d; ++ BIGNUM *x; ++ cl_error_t ret; ++ ++ *buffer = NULL; ++ *payload = NULL; ++ *payload_len = 0; ++ ret = CL_ERROR; ++ ++ keylen = BN_num_bytes(x509->n); ++ bnctx = BN_CTX_new(); ++ if (!bnctx) ++ goto done; ++ ++ x = BN_new(); ++ if (!x) ++ goto done; ++ ++ MALLOC(d, keylen); ++ ++ if (!BN_mod_exp(x, sig, x509->e, x509->n, bnctx)) { ++ cli_warnmsg("crtmgr_rsa_verify: verification failed: BN_mod_exp failed.\n"); ++ goto done; ++ } ++ ++ pad_size = BN_bn2bin(x, d); ++ if (pad_size < 0) { ++ cli_dbgmsg("crtmgr_rsa_verify: buffer too small.\n"); ++ goto done; ++ } ++ ++ ret = _padding_check_PKCS1_type_1(payload, payload_len, d, pad_size, keylen); ++ if (ret != CL_SUCCESS) { ++ cli_dbgmsg("crtmgr_rsa_verify: RSA_padding_check_PKCS1_type_1() failed\n"); ++ goto done; ++ } ++ *buffer = d; ++ d = NULL; ++ ret = CL_SUCCESS; ++ ++done: ++ BN_CTX_free(bnctx); ++ BN_free(x); ++ free(d); ++ return ret; ++} ++ ++static int crtmgr_rsa_verify(cli_crt *x509, BIGNUM *sig, cli_crt_hashtype hashtype, const uint8_t *refhash) ++{ ++ int keylen = BN_num_bytes(x509->n), siglen = BN_num_bytes(sig); ++ int j, objlen, hashlen; ++ uint8_t *d; ++ uint8_t *buff; ++ int len; ++ cl_error_t ret; + + if (hashtype == CLI_SHA1RSA) { + hashlen = SHA1_HASH_SIZE; +@@ -303,132 +468,100 @@ static int crtmgr_rsa_verify(cli_crt *x509, fp_int *sig, cli_crt_hashtype hashty + return 1; + } + +- fp_init(&x); ++ if (MAX(keylen, siglen) - MIN(keylen, siglen) > 1) { ++ cli_dbgmsg("crtmgr_rsa_verify: keylen and siglen differ by more than one\n"); ++ return 1; ++ } ++ ++ ret = crtmgr_get_recov_data(sig, x509, &buff, &d, &len); ++ if (ret != CL_SUCCESS) ++ return 1; + + do { +- if (MAX(keylen, siglen) - MIN(keylen, siglen) > 1) { +- cli_dbgmsg("crtmgr_rsa_verify: keylen and siglen differ by more than one\n"); ++ j = 0; ++ ++ if (len <= hashlen) { ++ cli_dbgmsg("crtmgr_rsa_verify: encountered len less than hashlen\n"); + break; + } +- if ((ret = fp_exptmod(sig, &x509->e, &x509->n, &x))) { +- cli_warnmsg("crtmgr_rsa_verify: verification failed: fp_exptmod failed with %d\n", ret); ++ /* hash is asn1 der encoded */ ++ /* SEQ { SEQ { OID, NULL }, OCTET STRING */ ++ if (len < 2 || d[j] != 0x30 || d[j + 1] != len - 2) { ++ cli_dbgmsg("crtmgr_rsa_verify: unexpected hash to be ASN1 DER encoded.\n"); + break; + } +- if (fp_unsigned_bin_size(&x) != keylen - 1) { +- cli_dbgmsg("crtmgr_rsa_verify: keylen-1 doesn't match expected size of exptmod result\n"); +- break; +- } +- if (((unsigned int)fp_unsigned_bin_size(&x)) > sizeof(d)) { +- cli_dbgmsg("crtmgr_rsa_verify: exptmod result would overrun working buffer\n"); ++ len -= 2; ++ j += 2; ++ ++ if (len < 2 || d[j] != 0x30) { ++ cli_dbgmsg("crtmgr_rsa_verify: expected SEQUENCE at beginning of cert AlgorithmIdentifier\n"); + break; + } + +- fp_to_unsigned_bin(&x, d); ++ objlen = d[j + 1]; + +- if (*d != 1) { /* block type 1 */ +- cli_dbgmsg("crtmgr_rsa_verify: expected block type 1 at d[0]\n"); ++ len -= 2; ++ j += 2; ++ if (len < objlen) { ++ cli_dbgmsg("crtmgr_rsa_verify: key length mismatch in ASN1 DER hash encoding\n"); + break; + } +- +- keylen -= 1; /* 0xff padding */ +- for (j = 1; j < keylen - 2; j++) +- if (d[j] != 0xff) +- break; +- if (j == keylen - 2) { +- cli_dbgmsg("crtmgr_rsa_verify: only encountered 0xFF padding parsing cert\n"); +- break; +- } +- if (d[j] != 0) { /* 0x00 separator */ +- cli_dbgmsg("crtmgr_rsa_verify: expected 0x00 separator\n"); +- break; +- } +- +- j++; +- keylen -= j; /* asn1 size */ +- +- if (keylen < hashlen) { +- cli_dbgmsg("crtmgr_rsa_verify: encountered keylen less than hashlen\n"); +- break; +- } +- if (keylen > hashlen) { +- /* hash is asn1 der encoded */ +- /* SEQ { SEQ { OID, NULL }, OCTET STRING */ +- if (keylen < 2 || d[j] != 0x30 || d[j + 1] + 2 != keylen) { +- cli_dbgmsg("crtmgr_rsa_verify: unexpected hash to be ASN1 DER encoded\n"); ++ if (objlen == 9) { ++ // Check for OID type indicating a length of 5, OID_sha1, and the NULL type/value ++ if (hashtype != CLI_SHA1RSA || memcmp(&d[j], "\x06\x05" OID_sha1 "\x05\x00", 9)) { ++ cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); + break; + } +- keylen -= 2; +- j += 2; +- +- if (keylen < 2 || d[j] != 0x30) { +- cli_dbgmsg("crtmgr_rsa_verify: expected SEQUENCE at beginning of cert AlgorithmIdentifier\n"); ++ } else if (objlen == 12) { ++ // Check for OID type indicating a length of 8, OID_md5, and the NULL type/value ++ if (hashtype != CLI_MD5RSA || memcmp(&d[j], "\x06\x08" OID_md5 "\x05\x00", 12)) { ++ cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); + break; + } +- +- objlen = d[j + 1]; +- +- keylen -= 2; +- j += 2; +- if (keylen < objlen) { +- cli_dbgmsg("crtmgr_rsa_verify: key length mismatch in ASN1 DER hash encoding\n"); +- break; +- } +- if (objlen == 9) { +- // Check for OID type indicating a length of 5, OID_sha1, and the NULL type/value +- if (hashtype != CLI_SHA1RSA || memcmp(&d[j], "\x06\x05" OID_sha1 "\x05\x00", 9)) { +- cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); ++ } else if (objlen == 13) { ++ if (hashtype == CLI_SHA256RSA) { ++ // Check for OID type indicating a length of 9, OID_sha256, and the NULL type/value ++ if (0 != memcmp(&d[j], "\x06\x09" OID_sha256 "\x05\x00", 13)) { ++ cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA256 hash\n"); + break; + } +- } else if (objlen == 12) { +- // Check for OID type indicating a length of 8, OID_md5, and the NULL type/value +- if (hashtype != CLI_MD5RSA || memcmp(&d[j], "\x06\x08" OID_md5 "\x05\x00", 12)) { +- cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); ++ ++ } else if (hashtype == CLI_SHA384RSA) { ++ // Check for OID type indicating a length of 9, OID_sha384, and the NULL type/value ++ if (0 != memcmp(&d[j], "\x06\x09" OID_sha384 "\x05\x00", 13)) { ++ cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA384 hash\n"); + break; + } +- } else if (objlen == 13) { +- if (hashtype == CLI_SHA256RSA) { +- // Check for OID type indicating a length of 9, OID_sha256, and the NULL type/value +- if (0 != memcmp(&d[j], "\x06\x09" OID_sha256 "\x05\x00", 13)) { +- cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA256 hash\n"); +- break; +- } + +- } else if (hashtype == CLI_SHA384RSA) { +- // Check for OID type indicating a length of 9, OID_sha384, and the NULL type/value +- if (0 != memcmp(&d[j], "\x06\x09" OID_sha384 "\x05\x00", 13)) { +- cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA384 hash\n"); +- break; +- } +- +- } else if (hashtype == CLI_SHA512RSA) { +- // Check for OID type indicating a length of 9, OID_sha512, and the NULL type/value +- if (0 != memcmp(&d[j], "\x06\x09" OID_sha512 "\x05\x00", 13)) { +- cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA512 hash\n"); +- break; +- } +- +- } else { +- cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); ++ } else if (hashtype == CLI_SHA512RSA) { ++ // Check for OID type indicating a length of 9, OID_sha512, and the NULL type/value ++ if (0 != memcmp(&d[j], "\x06\x09" OID_sha512 "\x05\x00", 13)) { ++ cli_dbgmsg("crtmgr_rsa_verify: invalid AlgorithmIdentifier block for SHA512 hash\n"); + break; + } ++ + } else { + cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); + break; + } +- +- keylen -= objlen; +- j += objlen; +- if (keylen < 2 || d[j] != 0x04 || d[j + 1] != hashlen) { +- cli_dbgmsg("crtmgr_rsa_verify: hash length mismatch in ASN1 DER hash encoding\n"); +- break; +- } +- keylen -= 2; +- j += 2; +- if (keylen != hashlen) { +- cli_dbgmsg("crtmgr_rsa_verify: extra data in the ASN1 DER hash encoding\n"); +- break; +- } ++ } else { ++ cli_errmsg("crtmgr_rsa_verify: FIXME ACAB - CRYPTO MISSING?\n"); ++ break; + } ++ ++ len -= objlen; ++ j += objlen; ++ if (len < 2 || d[j] != 0x04 || d[j + 1] != hashlen) { ++ cli_dbgmsg("crtmgr_rsa_verify: hash length mismatch in ASN1 DER hash encoding\n"); ++ break; ++ } ++ j += 2; ++ len -= 2; ++ if (len != hashlen) { ++ cli_dbgmsg("crtmgr_rsa_verify: extra data in the ASN1 DER hash encoding\n"); ++ break; ++ } ++ + if (memcmp(&d[j], refhash, hashlen)) { + // This is a common error case if we are using crtmgr_rsa_verify to + // determine whether we've found the right issuer certificate based +@@ -438,10 +571,12 @@ static int crtmgr_rsa_verify(cli_crt *x509, fp_int *sig, cli_crt_hashtype hashty + break; + } + ++ free(buff); + return 0; + + } while (0); + ++ free(buff); + return 1; + } + +@@ -469,7 +604,7 @@ cli_crt *crtmgr_verify_crt(crtmgr *m, cli_crt *x509) + if (i->certSign && + !i->isBlocked && + !memcmp(i->subject, x509->issuer, sizeof(i->subject)) && +- !crtmgr_rsa_verify(i, &x509->sig, x509->hashtype, x509->tbshash)) { ++ !crtmgr_rsa_verify(i, x509->sig, x509->hashtype, x509->tbshash)) { + int curscore; + if ((x509->codeSign & i->codeSign) == x509->codeSign && (x509->timeSign & i->timeSign) == x509->timeSign) + return i; +@@ -493,16 +628,18 @@ cli_crt *crtmgr_verify_crt(crtmgr *m, cli_crt *x509) + cli_crt *crtmgr_verify_pkcs7(crtmgr *m, const uint8_t *issuer, const uint8_t *serial, const void *signature, unsigned int signature_len, cli_crt_hashtype hashtype, const uint8_t *refhash, cli_vrfy_type vrfytype) + { + cli_crt *i; +- fp_int sig; ++ BIGNUM *sig; + + if (signature_len < 1024 / 8 || signature_len > 4096 / 8 + 1) { + cli_dbgmsg("crtmgr_verify_pkcs7: unsupported sig len: %u\n", signature_len); + return NULL; + } + +- fp_init(&sig); ++ sig = BN_new(); ++ if (!sig) ++ return NULL; + +- fp_read_unsigned_bin(&sig, signature, signature_len); ++ BN_bin2bn(signature, signature_len, sig); + + for (i = m->crts; i; i = i->next) { + if (vrfytype == VRFY_CODE && !i->codeSign) +@@ -511,13 +648,13 @@ cli_crt *crtmgr_verify_pkcs7(crtmgr *m, const uint8_t *issuer, const uint8_t *se + continue; + if (!memcmp(i->issuer, issuer, sizeof(i->issuer)) && + !memcmp(i->serial, serial, sizeof(i->serial))) { +- if (!crtmgr_rsa_verify(i, &sig, hashtype, refhash)) { ++ if (!crtmgr_rsa_verify(i, sig, hashtype, refhash)) { + break; + } + cli_dbgmsg("crtmgr_verify_pkcs7: found cert with matching issuer and serial but RSA verification failed\n"); + } + } +- ++ BN_free(sig); + return i; + } + +diff --git a/libclamav/crtmgr.h b/libclamav/crtmgr.h +index eafd820..c8009578 100644 +--- a/libclamav/crtmgr.h ++++ b/libclamav/crtmgr.h +@@ -23,8 +23,8 @@ + #define __CRTMGR_H + + #include +- +-#include "bignum.h" ++#include ++#include + + typedef enum { CLI_HASHTYPE_ANY, /* used by crts added from .CRB rules */ + CLI_SHA1RSA, +@@ -63,9 +63,9 @@ typedef struct cli_crt_t { + * so it must have at least enough space for the largest hash in + * cli_crt_hashtype */ + uint8_t tbshash[SHA512_HASH_SIZE]; +- fp_int n; +- fp_int e; +- fp_int sig; ++ BIGNUM *n; ++ BIGNUM *e; ++ BIGNUM *sig; + time_t not_before; + time_t not_after; + cli_crt_hashtype hashtype; +@@ -82,11 +82,11 @@ typedef struct { + unsigned int items; + } crtmgr; + +-void cli_crt_init(cli_crt *x509); ++int cli_crt_init(cli_crt *x509); + void cli_crt_clear(cli_crt *x509); + void crtmgr_init(crtmgr *m); + void crtmgr_free(crtmgr *m); +-int crtmgr_add(crtmgr *m, cli_crt *x509); ++bool crtmgr_add(crtmgr *m, cli_crt *x509); + cli_crt *crtmgr_lookup(crtmgr *m, cli_crt *x509); + cli_crt *crtmgr_block_list_lookup(crtmgr *m, cli_crt *x509); + cli_crt *crtmgr_trust_list_lookup(crtmgr *m, cli_crt *x509, int crb_crts_only); +diff --git a/libclamav/dsig.c b/libclamav/dsig.c +index c8825b2..59303f9 100644 +--- a/libclamav/dsig.c ++++ b/libclamav/dsig.c +@@ -30,12 +30,12 @@ + #include + #include + #include ++#include + + #include "clamav.h" + #include "others.h" + #include "dsig.h" + #include "str.h" +-#include "bignum.h" + + #ifndef _WIN32 + #include +@@ -81,37 +81,83 @@ static char cli_ndecode(unsigned char value) + return -1; + } + +-static unsigned char *cli_decodesig(const char *sig, unsigned int plen, fp_int e, fp_int n) ++static unsigned char *cli_decodesig(const char *sig, unsigned int plen, BIGNUM *e, BIGNUM *n) + { + int i, slen = strlen(sig), dec; +- unsigned char *plain; +- fp_int r, p, c; ++ unsigned char *plain = NULL, *ret_sig = NULL; ++ BIGNUM *r = NULL, *p = NULL, *c = NULL; ++ BN_CTX *bn_ctx; ++ unsigned int bn_bytes; ++ ; + +- fp_init(&r); +- fp_init(&c); ++ r = BN_new(); ++ if (!r) { ++ goto done; ++ } ++ ++ p = BN_new(); ++ if (!p) { ++ goto done; ++ } ++ ++ c = BN_new(); ++ if (!c) { ++ goto done; ++ } ++ ++ bn_ctx = BN_CTX_new(); ++ if (!bn_ctx) { ++ goto done; ++ } ++ ++ BN_zero(c); + for (i = 0; i < slen; i++) { + if ((dec = cli_ndecode(sig[i])) < 0) { +- return NULL; ++ goto done; ++ } ++ if (!BN_set_word(r, dec)) { ++ goto done; ++ } ++ if (!BN_lshift(r, r, 6 * i)) { ++ goto done; + } +- fp_set(&r, dec); +- fp_mul_2d(&r, 6 * i, &r); +- fp_add(&r, &c, &c); +- } + +- plain = (unsigned char *)cli_calloc(plen + 1, sizeof(unsigned char)); ++ if (!BN_add(c, c, r)) { ++ goto done; ++ } ++ } ++ if (!BN_mod_exp(p, c, e, n, bn_ctx)) { ++ goto done; ++ } ++ bn_bytes = BN_num_bytes(p); ++ /* Sometimes the size of the resulting BN (128) is larger than the expected ++ * length (16). The result does not match in this case. Instead of ++ * allocating memory and filling it, we fail early. ++ */ ++ if (plen < bn_bytes) { ++ cli_errmsg("cli_decodesig: Resulting signature too large (%d vs %d).\n", ++ bn_bytes, plen); ++ goto done; ++ } ++ plain = cli_calloc(plen, sizeof(unsigned char)); + if (!plain) { + cli_errmsg("cli_decodesig: Can't allocate memory for 'plain'\n"); +- return NULL; ++ goto done; + } +- fp_init(&p); +- fp_exptmod(&c, &e, &n, &p); /* plain = cipher^e mod n */ +- fp_set(&c, 256); +- for (i = plen - 1; i >= 0; i--) { /* reverse */ +- fp_div(&p, &c, &p, &r); +- plain[i] = MP_GET(&r); ++ if (!BN_bn2bin(p, plain)) { ++ goto done; + } + +- return plain; ++ ret_sig = plain; ++ plain = NULL; ++ ++done: ++ BN_free(r); ++ BN_free(p); ++ BN_free(c); ++ BN_CTX_free(bn_ctx); ++ free(plain); ++ return ret_sig; + } + + char *cli_getdsig(const char *host, const char *user, const unsigned char *data, unsigned int datalen, unsigned short mode) +@@ -228,41 +274,55 @@ char *cli_getdsig(const char *host, const char *user, const unsigned char *data, + return strdup(pt); + } + +-int cli_versig(const char *md5, const char *dsig) ++cl_error_t cli_versig(const char *md5, const char *dsig) + { +- fp_int n, e; +- char *pt, *pt2; ++ BIGNUM *n = NULL, *e = NULL; ++ char *pt = NULL, *pt2 = NULL; ++ int ret; ++ ++ ret = CL_EMEM; ++ n = BN_new(); ++ if (!n) ++ goto done; ++ ++ e = BN_new(); ++ if (!e) ++ goto done; ++ ++ ret = CL_EVERIFY; ++ if (!BN_dec2bn(&e, CLI_ESTR)) ++ goto done; ++ ++ if (!BN_dec2bn(&n, CLI_NSTR)) ++ goto done; + + if (strlen(md5) != 32 || !isalnum(md5[0])) { + /* someone is trying to fool us with empty/malformed MD5 ? */ + cli_errmsg("SECURITY WARNING: MD5 basic test failure.\n"); +- return CL_EVERIFY; ++ goto done; + } + +- fp_init(&n); +- fp_read_radix(&n, CLI_NSTR, 10); +- fp_init(&e); +- fp_read_radix(&e, CLI_ESTR, 10); +- +- if (!(pt = (char *)cli_decodesig(dsig, 16, e, n))) { +- return CL_EVERIFY; +- } ++ if (!(pt = (char *)cli_decodesig(dsig, 16, e, n))) ++ goto done; + + pt2 = cli_str2hex(pt, 16); +- free(pt); + + cli_dbgmsg("cli_versig: Decoded signature: %s\n", pt2); + + if (strncmp(md5, pt2, 32)) { + cli_dbgmsg("cli_versig: Signature doesn't match.\n"); +- free(pt2); +- return CL_EVERIFY; ++ goto done; + } + +- free(pt2); +- + cli_dbgmsg("cli_versig: Digital signature is correct.\n"); +- return CL_SUCCESS; ++ ret = CL_SUCCESS; ++ ++done: ++ free(pt); ++ free(pt2); ++ BN_free(n); ++ BN_free(e); ++ return ret; + } + + #define HASH_LEN 32 +@@ -275,21 +335,39 @@ int cli_versig2(const unsigned char *sha256, const char *dsig_str, const char *n + unsigned char mask[BLK_LEN], data[BLK_LEN], final[8 + 2 * HASH_LEN], c[4]; + unsigned int i, rounds; + void *ctx; +- fp_int n, e; ++ BIGNUM *n, *e; ++ int ret; + +- fp_init(&e); +- fp_read_radix(&e, e_str, 10); +- fp_init(&n); +- fp_read_radix(&n, n_str, 10); ++ n = BN_new(); ++ e = BN_new(); ++ ++ if (!n || !e) { ++ ret = CL_EMEM; ++ goto done; ++ } ++ ++ ret = CL_EVERIFY; ++ if (!BN_dec2bn(&e, e_str)) ++ goto done; ++ ++ if (!BN_dec2bn(&n, n_str)) ++ goto done; + + decoded = cli_decodesig(dsig_str, PAD_LEN, e, n); +- if (!decoded) +- return CL_EVERIFY; ++ if (!decoded) { ++ ret = CL_EVERIFY; ++ goto done; ++ } + + if (decoded[PAD_LEN - 1] != 0xbc) { + free(decoded); +- return CL_EVERIFY; ++ ret = CL_EVERIFY; + } ++ BN_free(n); ++ BN_free(e); ++ ++ n = NULL; ++ e = NULL; + + memcpy(mask, decoded, BLK_LEN); + memcpy(digest2, &decoded[BLK_LEN], HASH_LEN); +@@ -337,4 +415,9 @@ int cli_versig2(const unsigned char *sha256, const char *dsig_str, const char *n + cl_finish_hash(ctx, digest1); + + return memcmp(digest1, digest2, HASH_LEN) ? CL_EVERIFY : CL_SUCCESS; ++ ++done: ++ BN_free(n); ++ BN_free(e); ++ return ret; + } +diff --git a/libclamav/dsig.h b/libclamav/dsig.h +index cb11c65..36a3176 100644 +--- a/libclamav/dsig.h ++++ b/libclamav/dsig.h +@@ -29,7 +29,7 @@ + #include "clamav-config.h" + #endif + +-int cli_versig(const char *md5, const char *dsig); ++cl_error_t cli_versig(const char *md5, const char *dsig); + int cli_versig2(const unsigned char *sha256, const char *dsig_str, const char *n_str, const char *e_str); + + /** +diff --git a/libclamav/readdb.c b/libclamav/readdb.c +index b8e7d78..5b89970 100644 +--- a/libclamav/readdb.c ++++ b/libclamav/readdb.c +@@ -3310,9 +3310,7 @@ static int cli_loadcrt(FILE *fs, struct cl_engine *engine, struct cli_dbio *dbio + char *tokens[CRT_TOKENS + 1]; + size_t line = 0, tokens_count; + cli_crt ca; +- int ret = CL_SUCCESS; +- char *pubkey = NULL; +- const uint8_t exp[] = "\x01\x00\x01"; ++ int ret = CL_SUCCESS; + + if (!(engine->dconf->pe & PE_CONF_CERTS)) { + cli_dbgmsg("cli_loadcrt: Ignoring .crb sigs due to DCONF configuration\n"); +@@ -3324,7 +3322,10 @@ static int cli_loadcrt(FILE *fs, struct cl_engine *engine, struct cli_dbio *dbio + return ret; + } + +- cli_crt_init(&ca); ++ if (cli_crt_init(&ca) < 0) { ++ cli_dbgmsg("cli_loadcrt: No mem for CA init.\n"); ++ return CL_EMEM; ++ } + memset(ca.issuer, 0xca, sizeof(ca.issuer)); + + while (cli_dbgets(buffer, FILEBUFF, fs, dbio)) { +@@ -3402,16 +3403,17 @@ static int cli_loadcrt(FILE *fs, struct cl_engine *engine, struct cli_dbio *dbio + goto done; + } + +- pubkey = cli_hex2str(tokens[4]); +- if (!pubkey) { ++ if (BN_hex2bn(&ca.n, tokens[4]) == 0) { + cli_errmsg("cli_loadcrt: line %u: Cannot convert public key to binary string\n", (unsigned int)line); + ret = CL_EMALFDB; + goto done; + } + +- fp_read_unsigned_bin(&(ca.n), (const unsigned char *)pubkey, strlen(tokens[4]) / 2); +- +- fp_read_unsigned_bin(&(ca.e), exp, sizeof(exp) - 1); ++ /* Set the RSA exponent of 65537 */ ++ if (!BN_set_word(ca.e, 65537)) { ++ cli_errmsg("cli_loadcrt: Cannot set the exponent.\n"); ++ goto done; ++ } + + switch (tokens[6][0]) { + case '1': +@@ -3463,13 +3465,9 @@ static int cli_loadcrt(FILE *fs, struct cl_engine *engine, struct cli_dbio *dbio + + ca.hashtype = CLI_HASHTYPE_ANY; + crtmgr_add(&(engine->cmgr), &ca); +- +- FREE(pubkey); + } + + done: +- FREE(pubkey); +- + cli_dbgmsg("Number of certs: %d\n", engine->cmgr.items); + cli_crt_clear(&ca); + return ret; +diff --git a/libclamav/textnorm.c b/libclamav/textnorm.c +index 6c45530..90db3e6 100644 +--- a/libclamav/textnorm.c ++++ b/libclamav/textnorm.c +@@ -30,7 +30,6 @@ + #include + #include "clamav.h" + #include "textnorm.h" +-#include "bignum.h" + + int text_normalize_init(struct text_norm_state *state, unsigned char *out, size_t out_len) + { +diff --git a/libclamav/xdp.c b/libclamav/xdp.c +index f0e2fdd..97eeecf 100644 +--- a/libclamav/xdp.c ++++ b/libclamav/xdp.c +@@ -52,7 +52,6 @@ + #include "scanners.h" + #include "conv.h" + #include "xdp.h" +-#include "bignum.h" + #include "filetypes.h" + + static char *dump_xdp(cli_ctx *ctx, const char *start, size_t sz); diff -Nru clamav-1.0.1+dfsg/debian/patches/libclamav-pe-Use-endian-wrapper-in-more-places.patch clamav-1.0.2+dfsg/debian/patches/libclamav-pe-Use-endian-wrapper-in-more-places.patch --- clamav-1.0.1+dfsg/debian/patches/libclamav-pe-Use-endian-wrapper-in-more-places.patch 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/libclamav-pe-Use-endian-wrapper-in-more-places.patch 2023-08-27 09:35:11.000000000 +0000 @@ -1,4 +1,4 @@ -From 2abd896f6c7c91cc13f06cb10beedb62380d24d5 Mon Sep 17 00:00:00 2001 +From 47db1ecb0cb356d7f08f515965859221e0dca1af Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 6 Jan 2023 21:42:30 +0100 Subject: libclamav/pe: Use endian wrapper in more places. diff -Nru clamav-1.0.1+dfsg/debian/patches/series clamav-1.0.2+dfsg/debian/patches/series --- clamav-1.0.1+dfsg/debian/patches/series 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/patches/series 2023-08-27 09:35:11.000000000 +0000 @@ -1,9 +1,9 @@ Change-paths-in-sample-conf-file-to-match-Debian.patch -clamd_dont_depend_on_clamav_demon_socket.patch -Use-either-system-s-tomfastmath-library-or-the-built.patch cargo-Remove-windows-referenfes.patch libclamav-Sort-libclamav.map-and-libfreshclam.map.patch libclamav-Add-missing-symbols.patch Add-a-version-script-for-libclamav-and-libfreshclam.patch libclamav-pe-Use-endian-wrapper-in-more-places.patch Add-an-option-to-avoid-setting-RPATH-on-unix-systems.patch +libclamav-Use-OpenSSL-BN-instead-tomfastmath.patch +Remove-bundled-tomfastmath-library.patch diff -Nru clamav-1.0.1+dfsg/debian/rules clamav-1.0.2+dfsg/debian/rules --- clamav-1.0.1+dfsg/debian/rules 2023-02-26 16:35:36.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/rules 2023-08-27 09:35:11.000000000 +0000 @@ -160,3 +160,9 @@ # Don't compress the example configuration files. override_dh_compress: dh_compress -Xexamples + +override_dh_installsystemd: +ifneq (linux, $(DEB_HOST_ARCH_OS)) + dh_installsystemd --name clamav-clamonacc --no-enable --no-start +endif + dh_installsystemd --name clamav-daemon diff -Nru clamav-1.0.1+dfsg/debian/split-tarball.sh clamav-1.0.2+dfsg/debian/split-tarball.sh --- clamav-1.0.1+dfsg/debian/split-tarball.sh 2023-02-17 22:01:42.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/split-tarball.sh 2023-08-27 09:35:11.000000000 +0000 @@ -36,9 +36,6 @@ cd clamav-$VERS+dfsg # remove win32 stuff, doesn't make sense to ship it rm -rf win32 -sed -i 's/ win32//' Makefile.am Makefile.in -sed -i 's@libclammspack/config.h:libclammspack/config.h.in@@' configure.ac -find shared -name '*.la' -o -name '*.lo' -o -name Makefile -exec rm {} \; # cleanup llvm set -- libclamav/c++/llvm/utils/lit/lit/*.pyc if [ -f "$1" ] ; then @@ -52,47 +49,18 @@ # remove llvm, we build with the system version #rm -rf libclamav/c++/llvm cp -R libclamunrar_iface $UNRARDIR -cp libclamav/Makefile.am $UNRARDIR/libclamunrar_iface -mv libclamunrar $UNRARDIR -cp -R m4/ $UNRARDIR -cp -R config/ $UNRARDIR -cp platform.h.in $UNRARDIR -cp clamav-types.h.in $UNRARDIR -cp COPYING{,.unrar,.LGPL} $UNRARDIR -cd ../ -tar -cJf $DFSGPKG --numeric-owner clamav-$VERS+dfsg/ -cd $UNRARDIR -echo "Preparing unrar package" -# The sed sorcery below makes sure that the AC_CONFIG_FILES in the unrar package looks -# like: -# AC_CONFIG_FILES([ -# libclamunrar_iface/Makefile -# Makefile -# platform.h -# ]) -# It also removes ltdl, and renames the AC_CONFIG_SRCDIR parameter to an -# existing file. - -cp $UNRAR_CONF configure.ac -cat <Makefile.am && -ACLOCAL_AMFLAGS=-I m4 -DISTCLEANFILES = target.h -SUBDIRS = libclamunrar_iface -EOF +# XXX Add the libclamunrar bits -# The complete Makefile.am from libclamav/Makefile.am is huge and we -# only need the libclamunrar pieces. If we keep everything it will -# break for instance due to missing c++ folder or something else. -# The UNRAR block is the first one followed by LLVM so try to remove -# everything after the LLVM block so we should have enough to get the -# complete libclamunrar compiled. -sed -i '/if ENABLE_LLVM/,$d' libclamunrar_iface/Makefile.am +cd ../ +tar -cJf $DFSGPKG --numeric-owner clamav-$VERS+dfsg/ +echo "missing clamunrar, you need to look at that." +exit 0 -autoreconf -rm -r autom4te.cache -cd .. -tar -cJf $UNRARPKG --numeric-owner libclamunrar-$VERS/ +#cd $UNRARDIR +#echo "Preparing unrar package" +#cd .. +#tar -cJf $UNRARPKG --numeric-owner libclamunrar-$VERS/ printf "Test archives?" read yes diff -Nru clamav-1.0.1+dfsg/debian/upstream/signing-key.asc clamav-1.0.2+dfsg/debian/upstream/signing-key.asc --- clamav-1.0.1+dfsg/debian/upstream/signing-key.asc 2023-02-17 22:01:42.000000000 +0000 +++ clamav-1.0.2+dfsg/debian/upstream/signing-key.asc 2023-08-27 09:35:11.000000000 +0000 @@ -1,51 +1,63 @@ -----BEGIN PGP PUBLIC KEY BLOCK----- -mQINBGBjkiwBEADgJTEabt5zCareK9pJJswGU62smrq3uOaaDhtgztj3bxRY/UGT -jypxMee1S/fGWQZQy52lFOXLud5gFC5QU8Yk+7EAsh2ZJSKtWUw8/iMxZ4vsrKVV -QQRLTqMUY16R6/8UzdIT/hD6CbgWgiXF4NH5AGleNqjkF4TXrGof0AK0veekZYJV -WWStqJR/cIiG0nxDQ87RWfeZgrULZmA8uii22po7rGGzxT0byb83dKK+7IoJ/6B/ -ZlI0PmzuJ9/Xp6Mmm//sdPEqRwedt2aGrvtdF79xYJ1tDhOVMpID0aPdURBwlliq -fyKGaIUEa1ke+Dy7sQF8i3zY7ce6PZOtbsts9xsJLvF98VhRsFy0vProPv1mVbiU -PoxxPTnyLeGUm27amIMl4NfX4a8Hdu+ExzKprqWo3Ir08HQzNt6QoFghDIpi9nm4 -k327CJzJv/g2dq5kY/KU6wFHbdH3zP7u+p9DDqKJYFebPCvwM1hMxPdLqemTsfob -kJ4iXcAXjpMqwXX9m0lyQcRHdIdc99yyCUMdPNfapLgY7rOahsS16795/5KSrCuF -h2RcoAWUjh6sGjgGIY4Hy1qQwp3t6X/L6TOhDkBDWId5bTKFR9NqrVprOVsUutbs -0TOqLyH4GXCpE9vzg8DX7FTdRiCTpbyQ7VuSxRN/vAyVRP4chrABNfvh/QARAQAB +mQINBGQPO58BEACsF0vtWepeSZRklvCG170RKuZL+9aH8U3zVVtQgDlmcboVRiFf ++fgraQCRVh8cbRM76mqqGoMT0BlwZ1OfrzpZcrNUg5uAgok51P7SoCy3zummnv4M +TadwDLEHNf/38HSnrJe196IiwMEtyuKMGDfzyjQnr357Owem+7FgT2/sU7XwWD2B ++tn/yhbw+HpJuUjdmxmEqJr/4okRSj9OSWV+EFhS9owMNK8zntwHkJzmv4ctS1Ak +Zryh/J3jEnPqzSJDsH729XzKpG4BxCxnybP5WuMsJuNvSlVhVko1PaSi84Dy003w +WoQIgtQHNm6i8CcetrpNCULELTU8sViwdBQXIlGjCa3N+dq1ZOErasp4QzlCVOus +iOkm1KltvbJWPfVDW0A0Z4mP19YRlQTc0jn4w9R5ROmwcLf6Co8nBD2AV8MFjVJA +E21Mfj6LfksplmYg/DEa4kCe8KYPSATq6LFSf+o96fkmnsZovOi6zZ6RtV9l4Aya +pkcvk9iO2cvJMDYJ6iA2dC8EHC2m1tt1Rs2abJqOmsUJATo7MUpK7MD7NyhVvkjJ +j5QRES25uV4OY9ck091GB+XXAf3gGf3Pi2jop1gauGoxyBqLT4SkwqsnsrFF8eEh +A8UdBmo4K6MWFaxw6JsBPpIM63Qe848RzlQRanxS2n50ZZwMLIJrI2MEFQARAQAB tDtUYWxvcyAoVGFsb3MsIENpc2NvIFN5c3RlbXMgSW5jLikgPHJlc2VhcmNoQHNv -dXJjZWZpcmUuY29tPokCPgQTAQIAKAUCYGOSLAIbAwUJA8JnAAYLCQgHAwIGFQgC -CQoLBBYCAwECHgECF4AACgkQYJsCTys+3QfbLg//eZ0yCLr957FtztVlLIHYLpJn -LIl8m+hu3KeUTIwvMoCLiw48cWqFZaJS9PTmrraSj5SKMDnAYFl4O0fhHfQiWDjb -sZ32hQni1PcqxoXqSnkXD7mXjcPH2WuNnQM5WZoAD2VmksqRT57I/K2omW/sjaVe -Nbq3GSOy8WThibswxzioDHtTPFa0/Ah2qq8OkcVJuTwCS1xkLijJc3jx/pOBHWFA -BA4VX5pwcSou/woJ+ySsgBGEo5hOsd0r7h3a0O8EiuGulHTqQt87rVWGv0JKhnub -FULr/ld8+d1zGvJL3OzFG6udjWjw3QqsLDZa94G1ksZWgqr/RgexlSYuxPW+lKUC -QkgotLaEKQC4cpBLRcJEjWyrf4IjoJvkFrUtPsVH9VStICUQATyXARNVWbnJHq3Y -qynCXSB4NZvdo9BF6Tx3FA+ZUjK4/X/UsjL/Hmv99huBctQsWL7gQCoSw9YOt4qs -/As6fgPaNpYb9woJqNMEQNmrhfnnX9PGaM5dM769/E5vF67mkhBNqVJ0+4gyrpTU -T7Pmavrc3T4aSSde8eG6zSlmW8wM5xELfK5TeTexBKGAaDV8c2BkfenRO8OvBSvr -Gz+Xp/YzO9uGUPnbMsTVtxClmzmEj/MVpvtRdEo+dbVOSy8nk3XCu7jMjpojggPv -YQ+4CZYxYpW1T2hSFxG5Ag0EYGOSLAEQAM5kdheiwStznKiaIWaO+0PBA8bAv2xG -7qW/Di85xdcH9miHZM9+lx/iZoOBC9wZC9eatV4Hcukff700a/LGZSYVDvHvdEWb -Tv6ZwvHzbxuc1Kv8cLYopRUfOAwMYOmXriMLxVmd3fcfPNsfPRqfkaZRdkm7qTbP -DeKpSL157HbUG64Eej3cOViq49Hy9L6jtfjtZVxX7OavjnEpyezG6qSIAkvD6O7J -Yg3yfkr4sa44qohq9lDfjWpoXMebu0WsIyW11hm+7KMrBMHjlNgXppu0+ryeKfQi -FjPDBd9aflnHy2e8aHef9S5349thNGzjV3TNMV6A6oAN2XQ7pgj5DTwMZtHFCjdE -HIyfGCAgQQL0/MaFzKwuw/l/m31smZgItAZXYY1xoC2gh7LTPZ/3t2VVVof4TNXD -c+pUNgY6bwPBksuhsX8qsldDr5q3jdHZsjlycpL38Z4EZNg3BqxJlVseB395ZOQ6 -FCtHGh6rpsYQZDj1QWcUyev8NHSbSNRMS2/Nn5bT3KgEWEWrmOxp3iMmunBqmnt2 -/xJ83PKRTbSKgcG+Y/+DtnleHpRueRUPC/5XX0DNznSjF10vAh4XtBKGBNaHU9Vv -nMXlYeJ9kCMdSs7cM4FfLFMtPkFwpDYhvQRAEwt11RV6bGo5ZCgGrHGIBlNk6ZSO -1hP15hUtkWU7ABEBAAGJAiUEGAECAA8FAmBjkiwCGwwFCQPCZwAACgkQYJsCTys+ -3QfI7Q//Sb2yotfcsG5Q2FkHRBE85su01c6pewImV9bofNhATSQ37yVHUDrchm+k -Y6Pq5Tdgg+eAMcYz2yv9JhFxJyzgI0viQrkjD7oXeRTGZ0CvzxHhTakAOADXAnYt -wmJglEBTCCbUZ968kQkdBxEaUjVWPCMyIceRr8kUfiCjX51+DLESy8b5lOBhprO6 -vDukk/rmDruIpJPhJ3f89gsp2Ry7gk7a5ENIuVEElLK6OPBZhC3dDZwsvm5CYb62 -+U/b1xtmElpgGbNJCjxvAZiJ0WN2zfBXan+SJ4I9NFUw9jvSURvDV24s4YPhkbZu -OIqQEEYF8QMZ1VJlsr7BoWIXrdKDNJbmEVyx3UiYXKD1BVXCQADPu8G8EPuo4yAf -WymJAOJbAqNF2Op6+sC7/v8Xcgc3PGGyu23cZwikfCAgV+beywTPI5+eVV5F/rpx -XOlvNxT0NOg3UOeQ9GvCbD5ZcuDzmhqso0eMABeq5K5XB12xlWNaTZsIt8Dim4uK -aKMGeB+6iygkHITbay0sMUo0dX6nT27bjX5dTBo/vnVAPYuCS6rh8ojalR1fYFKA -1zdeSaJ2EW5KmgC9yedylSbHdQ+LjSY3t/Ut4RYaekIDeGmVoQkJkL7gIAs8NOYw -G3ayr0AtmeMagAMy94NH5ufVgFk+QPmXpzS7rMLQ3Is1ZOuWNrQ= -=gazS +dXJjZWZpcmUuY29tPokCPgQTAQIAKAUCZA87nwIbAwUJA8JnAAYLCQgHAwIGFQgC +CQoLBBYCAwECHgECF4AACgkQzODf0h7Bqb8gjw/9FYbuwzBjuVCVhHfoY4rfCDoj +eh3NVaTdHIWO1yp6JSM/ny+Z3wDzZLtyQlBcnaJlerncS961iOEG2gBA3v8fZudN +JFpvRC1gxd9IEhGXIDDg+BeOAJUbY9LQTc/dnzWBB04nun20+lM/Rad2BlkQ+YSz +uRUaFsgk0lQPCSDQfoahtoap14jWFsa19aOjTXhAF1MGEDXuoCXM6ByH6wJjtz+z +QJrXvmHS4v8yh8z/pibLGV7IgNrtoW2ej4jFadzEEn/MDajI+5N3C2w5rD41L7Lm +j1uCIBe1G54fgJSstvBxZcnAj9qTF2FBBUpQ1q/ONFfUjpAGQKG2qh1UNBiOZNS3 +gDVN2T8h083WRN2gQvNJnJwXaF4Nm6zhmX4sUqE9nexUrDF8VG8xXJwPgZijaHPV +nZdgDZvQ47BKiJOUj80O9/qYyWo89pX6Rr/YmfbURhRe/kiPon9kIVFCzDDFPniJ +svICjpdkz7wZ0kUN+L7BtDQJfjFjTJPNA2nOV6l64DcdCiyutOFSz4Zf85GoT9wK +Mqv1UmpLwsq2FnF+Gpk1GLZCLprSCu3n16pr+gdRshnE93cvJbMGlP0+jcuFF5hr +Lsvujl7O81JrIjmGXrulHHpdrZQ4J2A3UpDDc60DOHG9ubnBnN7k2kQPY+9a1rzf +WPkMQKaxVo3uH1XRO/GJAhwEEAECAAYFAmQPQKgACgkQYJsCTys+3QcvuA//cuJX +LDfsGn9pWTCU83cF6eiQ5Id5FPKldyhSqYRgavgRov0fwD6ZU79dpURf+YsWxxtI +pIntn9hUgSgmdyUw+0GcAmFq6gJOQxWY2nij6X0A9Pskr2qW+WhMGKKVbYez65qw +fgGdlDFT/4nzVBGpIlRGGuOC0aT3jDhBXbp8Eusxi+5He7Kx2Chem7kCX9xBpUYS +FrujMlaMs8O1bsBW3xTWLpHhX6O6bpEY8zDfWavSAqCmzw5RtytAJWsAG1clU9AK +FwSKC+10ODo5VFzmRSgF727Gtuow1WnPhFM/7Cn+M+knCTm2vRz6Vz29/a6DUrZl +CbyKGPR8a9C3UG4VT8C3+fi1boZ+/trUw27YtrKp70FDy3UdgLDF2eO9B77vs35n ++hf2EipG407CGBqb8q6boOdxC0BN/Fcy30Oms4DSUTqEiqvSA/35BhyGfOmJb5tt +kMEHLPveJvilICKBMQdYHemR3mk+muzAO7+y4VOKl+rP0xXCp6y6PAiEu14lzxzI +isQu6omEJBOUiad2iZz+4OUU1Dil0YgUpNgJQyKaDUOR0MSzFU9IM5pzZJ14XkdG +6iriPEX1V9SlfZlaJDNlN11vFlVFeu02vJTcddAaHYad2tKD09GAEuZkib0ToWxz +S+4cBxojti6vMUHVSIlbov7ZMHd/WMqQUb1tSl65Ag0EZA87nwEQALkEL5rxEnv7 +rcwcF3KwcppfHTWjkTV0dyMmE/kLf9e3QnMdCaiZMypxmYipOe9Z/9G6YGH+Qujp +N0mzenNgKljs961VTbOUYTusgwTz1qFienX8lg+eYRQIpqPjisb1xGlISojI7vWO +FZT/LrxVI6Y+HLSXkZjPD7TqyefgOlP2YchmFAjC/e+rtKAZ+FLlguotvDRxl/zp +AA8LLFup8Y8+BvQIWiy6jwwAjJMiJdwBtUz1OxpMuGU/C6bWCkAAFKjhC5F9JQEI +9jHh7/cQEGabDmjIGfywj9jniJrP79hrLfuryFvo6qbw7EwirJbKpoHJwS03ei29 +Uwttw2Dn41dZ0MvjfpYwI61cE5NpvKCBJkkEho6SDXGvLABerEu3ASGlYybQOzrg +aHO9AxGXgD2tFjI0NNunVxy/0KQ+kWcdQ1p/dk/O2U6w5CfFHU68aZgAxmj7jngx +YKjs3IAUy8mwkxtyyFiLJ3E19NdB8+t0cjJMtDVtXOgmoi7HaP8RghdaitaI4q/z +ocIAWhJhN7IkzrYWJ/Bkq4j0doKmaDR8GPP3i5Keg1c1z4yGX1c9MWTMy49l5Nwl +/bUjUiIRocCc33dZCqL5KPMBdtLJOUiIG/KZoMqr6Ozxyriv4Nn/CT2/SSvatYtP +SN91kt61c2FmoBBSltiFwncbUVmB3HmDABEBAAGJAiUEGAECAA8FAmQPO58CGwwF +CQPCZwAACgkQzODf0h7Bqb/ueQ/7BofldLW0/GqvTMEDnysUB/tchWzae6LnBeur +EhIB6smOVkMiuzrRLl2/vFVmv6H1UZK2fRPpaI/3V2mg+ML5ioVVgBrg3IQxcDpY +sYiictUFXJQ9y/ygAl8zxbkE4v4BWAwk5kIFWw1q/sb3IUc07GeK16PLY0+ocPdV +vMyiV8w5wKBlkyPwdntjuJEyfU3lsIeR2iBcQe4HL1Y0/pm6Ilpn+uj2ZYlYZzhN +zBuLy9HB3it161KP/RyxWNB1AEAAx8Mh0IhHOEWLvbfjHJxkJ2GX0TgL5wa45l2a +3clP4Dw2MpLfzIHs+CxG7t6IdSvoX1+0gZPvmo9JXDsLNa7+uu/lcCUjXY9TWdvc +VIZRwlSBQQC8WnGpbkvsBDsJ2BskPWOmv0ol3aiiekJJhVT1K9M1ZwDGX1ts8hLr +mf0kCFDq0RImCg6WZAM6z3Fg/1pPGPRktJ4tmSui3GYzrVA34gTunvlqPYKCFYHA +EdUdqycz7UAroj7k3OndZGnnT2r/qKaIYF53/u+6SXM/lUSrJfwxG9eXiw80P/YW +K9VjT3CbQA74vz7pC1bxpYDas6w39DRpkYR1bn1GIhmJhK2CUj5FQla+opVN2Wmg +sk0O7hoet7RDvKpoUyBHxHOJseDQEzWc38bOxD+x0vz/MirBnLdBx8g836tgqy7h +ab6V2qU= +=X+5e -----END PGP PUBLIC KEY BLOCK----- diff -Nru clamav-1.0.1+dfsg/libclamav/autoit.c clamav-1.0.2+dfsg/libclamav/autoit.c --- clamav-1.0.1+dfsg/libclamav/autoit.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/autoit.c 2023-08-15 22:24:07.000000000 +0000 @@ -761,6 +761,10 @@ cli_dbgmsg("autoit: file is compressed\n"); if (cli_readint32(UNP.inputbuf) != 0x35304145) { cli_dbgmsg("autoit: bad magic or unsupported version\n"); + // Free this inputbuf and set back to NULL. + free(UNP.inputbuf); + UNP.inputbuf = NULL; + continue; } @@ -769,6 +773,10 @@ } if (cli_checklimits("autoit", ctx, UNP.usize, 0, 0) != CL_CLEAN) { + // Free this inputbuf and set back to NULL. + free(UNP.inputbuf); + UNP.inputbuf = NULL; + continue; } @@ -848,12 +856,16 @@ */ cli_dbgmsg("autoit: file is not compressed\n"); UNP.outputbuf = UNP.inputbuf; - UNP.usize = UNP.csize; + UNP.inputbuf = NULL; + + UNP.usize = UNP.csize; } if (UNP.usize < 4) { cli_dbgmsg("autoit: file is too short\n"); free(UNP.outputbuf); + UNP.outputbuf = NULL; + continue; } diff -Nru clamav-1.0.1+dfsg/libclamav/bytecode_api.h clamav-1.0.2+dfsg/libclamav/bytecode_api.h --- clamav-1.0.1+dfsg/libclamav/bytecode_api.h 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/bytecode_api.h 2023-08-15 22:24:07.000000000 +0000 @@ -165,6 +165,7 @@ FUNC_LEVEL_1_0 = 160, /**< LibClamAV release 1.0.0 */ FUNC_LEVEL_1_0_1 = 161, /**< LibClamAV release 1.0.1 */ + FUNC_LEVEL_1_0_2 = 162, /**< LibClamAV release 1.0.2 */ }; /** diff -Nru clamav-1.0.1+dfsg/libclamav/hfsplus.c clamav-1.0.2+dfsg/libclamav/hfsplus.c --- clamav-1.0.1+dfsg/libclamav/hfsplus.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/hfsplus.c 2023-08-15 22:24:07.000000000 +0000 @@ -1323,6 +1323,11 @@ stream.next_out = uncompressed_block; extracted_file = true; + + if (stream.avail_in > 0 && Z_STREAM_END == z_ret) { + cli_dbgmsg("hfsplus_walk_catalog: Reached end of stream even though there's still some available bytes left!\n"); + break; + } } } else { if (cli_writen(ofd, &block[streamBeginning ? 1 : 0], readLen - (streamBeginning ? 1 : 0)) != readLen - (streamBeginning ? 1 : 0)) { diff -Nru clamav-1.0.1+dfsg/libclamav/matcher-ac.c clamav-1.0.2+dfsg/libclamav/matcher-ac.c --- clamav-1.0.1+dfsg/libclamav/matcher-ac.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/matcher-ac.c 2023-08-15 22:24:07.000000000 +0000 @@ -2951,6 +2951,7 @@ cli_warnmsg("cli_ac_addsig: cannot use filter for trie\n"); MPOOL_FREE(root->mempool, root->filter); root->filter = NULL; + return CL_EMALFDB; } /* TODO: should this affect maxpatlen? */ diff -Nru clamav-1.0.1+dfsg/libclamav/matcher-bm.c clamav-1.0.2+dfsg/libclamav/matcher-bm.c --- clamav-1.0.1+dfsg/libclamav/matcher-bm.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/matcher-bm.c 2023-08-15 22:24:07.000000000 +0000 @@ -72,6 +72,7 @@ cli_warnmsg("cli_bm_addpatt: cannot use filter for trie\n"); MPOOL_FREE(root->mempool, root->filter); root->filter = NULL; + return CL_EMALFDB; } /* TODO: should this affect maxpatlen? */ } diff -Nru clamav-1.0.1+dfsg/libclamav/readdb.c clamav-1.0.2+dfsg/libclamav/readdb.c --- clamav-1.0.1+dfsg/libclamav/readdb.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/readdb.c 2023-08-15 22:24:07.000000000 +0000 @@ -4862,9 +4862,11 @@ if (fs) fclose(fs); - if (engine->cb_sigload_progress) { - /* Let the progress callback function know how we're doing */ - (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx); + if (CL_SUCCESS == ret) { + if (engine->cb_sigload_progress) { + /* Let the progress callback function know how we're doing */ + (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx); + } } return ret; diff -Nru clamav-1.0.1+dfsg/libclamav/rtf.c clamav-1.0.2+dfsg/libclamav/rtf.c --- clamav-1.0.1+dfsg/libclamav/rtf.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/rtf.c 2023-08-15 22:24:07.000000000 +0000 @@ -168,9 +168,11 @@ /* grow stack */ struct rtf_state* states; stack->stack_size += 128; - states = cli_realloc2(stack->states, stack->stack_size * sizeof(*stack->states)); - if (!states) + states = cli_realloc(stack->states, stack->stack_size * sizeof(*stack->states)); + if (!states) { + // Realloc failed. Note that stack->states has not been freed and must still be cleaned up by the caller. return CL_EMEM; + } stack->states = states; } stack->states[stack->stack_cnt++] = *state; diff -Nru clamav-1.0.1+dfsg/libclamav/scanners.c clamav-1.0.2+dfsg/libclamav/scanners.c --- clamav-1.0.1+dfsg/libclamav/scanners.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/scanners.c 2023-08-15 22:24:07.000000000 +0000 @@ -1632,7 +1632,8 @@ char *hash = NULL; char path[PATH_MAX]; char filename[PATH_MAX]; - int tempfd = -1; + int tempfd = -1; + char *tempfile = NULL; if (CL_SUCCESS != (ret = uniq_get(U, "dir", 3, &hash, &hashcnt))) { cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: uniq_get('dir') failed with ret code (%d)!\n", ret); @@ -1649,7 +1650,7 @@ if (CL_SUCCESS == find_file(filename, dir, path, sizeof(path))) { cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: Found dir file: %s\n", path); - if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros)) != CL_SUCCESS) { + if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros, &tempfile)) != CL_SUCCESS) { // FIXME: Since we only know the stream name of the OLE2 stream, but not its path inside the // OLE2 archive, we don't know if we have the right file. The only thing we can do is // iterate all of them until one succeeds. @@ -1693,6 +1694,14 @@ close(tempfd); tempfd = -1; + + if (tempfile) { + if (!ctx->engine->keeptmp) { + remove(tempfile); + } + free(tempfile); + tempfile = NULL; + } } hashcnt--; @@ -1704,6 +1713,14 @@ tempfd = -1; } + if (tempfile) { + if (!ctx->engine->keeptmp) { + remove(tempfile); + } + free(tempfile); + tempfile = NULL; + } + return ret; } diff -Nru clamav-1.0.1+dfsg/libclamav/vba_extract.c clamav-1.0.2+dfsg/libclamav/vba_extract.c --- clamav-1.0.1+dfsg/libclamav/vba_extract.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/vba_extract.c 2023-08-15 22:24:07.000000000 +0000 @@ -358,7 +358,7 @@ * Read a VBA project in an OLE directory. * Contrary to cli_vba_readdir, this function uses the dir file to locate VBA modules. */ -cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros) +cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros, char **tempfile) { cl_error_t ret = CL_SUCCESS; char fullname[1024]; @@ -367,7 +367,6 @@ size_t data_len; size_t data_offset; const char *stream_name = NULL; - char *tempfile = NULL; uint16_t codepage = CODEPAGE_ISO8859_1; unsigned i; char *mbcs_name = NULL, *utf16_name = NULL; @@ -375,7 +374,7 @@ unsigned char *module_data = NULL, *module_data_utf8 = NULL; size_t module_data_size = 0, module_data_utf8_size = 0; - if (dir == NULL || hash == NULL || tempfd == NULL || has_macros == NULL) { + if (dir == NULL || hash == NULL || tempfd == NULL || has_macros == NULL || tempfile == NULL) { return CL_EARG; } @@ -398,12 +397,12 @@ *has_macros = *has_macros + 1; - if ((ret = cli_gentempfd_with_prefix(ctx->sub_tmpdir, "vba_project", &tempfile, tempfd)) != CL_SUCCESS) { + if ((ret = cli_gentempfd_with_prefix(ctx->sub_tmpdir, "vba_project", tempfile, tempfd)) != CL_SUCCESS) { cli_warnmsg("vba_readdir_new: VBA project cannot be dumped to file\n"); goto done; } - cli_dbgmsg("Dumping VBA project from dir %s to file %s\n", fullname, tempfile); + cli_dbgmsg("Dumping VBA project from dir %s to file %s\n", fullname, *tempfile); #define CLI_WRITEN(msg, size) \ do { \ @@ -1305,9 +1304,6 @@ if (stream_name) { free((void *)stream_name); } - if (tempfile) { - free(tempfile); - } if (ret != CL_SUCCESS && *tempfd >= 0) { close(*tempfd); *tempfd = -1; @@ -1972,7 +1968,7 @@ uint32_t len __attribute__((packed)); uint32_t state __attribute__((packed)); uint32_t offset __attribute__((packed)); - } * m; + } *m; const struct macro *n; #ifdef HAVE_PRAGMA_PACK #pragma pack() diff -Nru clamav-1.0.1+dfsg/libclamav/vba_extract.h clamav-1.0.2+dfsg/libclamav/vba_extract.h --- clamav-1.0.1+dfsg/libclamav/vba_extract.h 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/vba_extract.h 2023-08-15 22:24:07.000000000 +0000 @@ -40,7 +40,7 @@ } vba_project_t; vba_project_t *cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which); -cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros); +cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros, char **tempfile); vba_project_t *cli_wm_readdir(int fd); void cli_free_vba_project(vba_project_t *vba_project); diff -Nru clamav-1.0.1+dfsg/libclamav/xlm_extract.c clamav-1.0.2+dfsg/libclamav/xlm_extract.c --- clamav-1.0.1+dfsg/libclamav/xlm_extract.c 2023-02-13 06:00:26.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav/xlm_extract.c 2023-08-15 22:24:07.000000000 +0000 @@ -4994,6 +4994,9 @@ FREE(data); + if (tempfile && !ctx->engine->keeptmp) { + remove(tempfile); + } FREE(tempfile); return status; diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/.cargo-checksum.json 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"c9b1b15e299ba4e6ed0d6f25cde30b26b13b6068a7fbd980000c37bca19b0104","DESIGN.md":"64ff45ea2a89d4c32b29af91acb7743a861fcac417cb94fde8e6559405d603b2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5999e5768f5da8ab9b50c016766b5185b4c79936c56bef6d311ddcb0a38c4b94","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"b92c9a65c4ee8029ff5a710aa1514caf838e73072c177dff5375463769f0b1ce","src/automaton.rs":"931af0aad03079bc4f6400d573fce832ce1edeeaf196815a16750d57b54b2183","src/buffer.rs":"dae7ee7c1f846ca9cf115ba4949484000e1837b4fb7311f8d8c9a35011c9c26f","src/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/classes.rs":"99a53a2ed8eea8c13699def90e31dfdff9d0b90572b1db3cb534e3396e7a0ed0","src/dfa.rs":"25e4455b3e179a7e192108d05f3683993456b36e3ebed99f827558c52525b7e6","src/error.rs":"d34c2c9c815df5d9dedc46b4b3ce109cd2cee07825de643f0c574ec960367beb","src/lib.rs":"7a47d4c87f83e0e7ddf0777a71e4858904e73477ce18404cb89e656070e86aef","src/nfa.rs":"3b817b4aa85540e8c0d35aff7ed7cfbab70ec7d2aaa779d63b4f5369bff31ce1","src/packed/api.rs":"df42e7500c94c9de1ac44145a0dd99ea02047e6bba229da12f2575337beebcf0","src/packed/mod.rs":"ad2f8e18996737347a1181a4457387276d139315bcabfc5e34494af0c0319701","src/packed/pattern.rs":"3abf3835d4c4f8a43753c52936a894d819f713f233fc046e19de5ef95200dcce","src/packed/rabinkarp.rs":"ad7d4533f96aed336e29c5553657ae57b0d733ace9c707a6cf4d08d8fc6edee5","src/packed/teddy/README.md":"b4b83fb5afafbbea6cb76fe70f49cc8ced888f682d98abe5ea5773e95d9ec2b0","src/packed/teddy/compile.rs":"aad40b3f93d2c388b409b31fb2795d414a365237789d5b1a7510d97ceb8ce260","src/packed/teddy/mod.rs":"83b52bd80272970ad17234d0db293d17c1710ec582302bf516b203c8edec037e","src/packed/teddy/runtime.rs":"836146e90b320b14fa2c65fe4af7915a41f6fb04408aac5fac731c22ff46adae","src/packed/tests.rs":"b8dc4d3281ecd6d0fa2bf7ef16cf292a467dfdce64e470c7921e983bfa60fee2","src/packed/vector.rs":"ab3c0535fca5f09198d58cbfae44c292aeb3ce44bc92bca36d30dc72963639fc","src/prefilter.rs":"82a3eb6d5c0c3f10bc8d5f57d55d6d14cf4cf21c475bb5253e1921084063b8d7","src/state_id.rs":"519ec8c7bf3fa72103d4c561c193759759f535dca924c9853efe630f406d2029","src/tests.rs":"ee9b85f3c27cb2fba5796e5be8019aafecc13ee9a4f614553f2bc8953f51c6de"},"package":"cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"} \ No newline at end of file diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/COPYING clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/COPYING --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/COPYING 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/COPYING 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/Cargo.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2018" -name = "aho-corasick" -version = "0.7.20" -authors = ["Andrew Gallant "] -exclude = ["/aho-corasick-debug"] -autotests = false -description = "Fast multiple substring searching." -homepage = "https://github.com/BurntSushi/aho-corasick" -readme = "README.md" -keywords = [ - "string", - "search", - "text", - "aho", - "multi", -] -categories = ["text-processing"] -license = "Unlicense OR MIT" -repository = "https://github.com/BurntSushi/aho-corasick" - -[profile.bench] -debug = true - -[profile.release] -debug = true - -[lib] -name = "aho_corasick" - -[dependencies.memchr] -version = "2.4.0" -default-features = false - -[dev-dependencies] - -[features] -default = ["std"] -std = ["memchr/std"] diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/DESIGN.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,483 +0,0 @@ -This document describes the internal design of this crate, which is an object -lesson in what happens when you take a fairly simple old algorithm like -Aho-Corasick and make it fast and production ready. - -The target audience of this document is Rust programmers that have some -familiarity with string searching, however, one does not need to know the -Aho-Corasick algorithm in order to read this (it is explained below). One -should, however, know what a trie is. (If you don't, go read its Wikipedia -article.) - -The center-piece of this crate is an implementation of Aho-Corasick. On its -own, Aho-Corasick isn't that complicated. The complex pieces come from the -different variants of Aho-Corasick implemented in this crate. Specifically, -they are: - -* Aho-Corasick as an NFA, using dense transitions near the root with sparse - transitions elsewhere. -* Aho-Corasick as a DFA. (An NFA is slower to search, but cheaper to construct - and uses less memory.) - * A DFA with pre-multiplied state identifiers. This saves a multiplication - instruction in the core search loop. - * A DFA with equivalence classes of bytes as the alphabet, instead of the - traditional 256-byte alphabet. This shrinks the size of the DFA in memory, - but adds an extra lookup in the core search loop to map the input byte to - an equivalent class. -* The option to choose how state identifiers are represented, via one of - u8, u16, u32, u64 or usize. This permits creating compact automatons when - matching a small number of patterns. -* Supporting "standard" match semantics, along with its overlapping variant, - in addition to leftmost-first and leftmost-longest semantics. The "standard" - semantics are typically what you see in a textbook description of - Aho-Corasick. However, Aho-Corasick is also useful as an optimization in - regex engines, which often use leftmost-first or leftmost-longest semantics. - Thus, it is useful to implement those semantics here. The "standard" and - "leftmost" search algorithms are subtly different, and also require slightly - different construction algorithms. -* Support for ASCII case insensitive matching. -* Support for accelerating searches when the patterns all start with a small - number of fixed bytes. Or alternatively, when the patterns all contain a - small number of rare bytes. (Searching for these bytes uses SIMD vectorized - code courtesy of `memchr`.) -* Transparent support for alternative SIMD vectorized search routines for - smaller number of literals, such as the Teddy algorithm. We called these - "packed" search routines because they use SIMD. They can often be an order of - magnitude faster than just Aho-Corasick, but don't scale as well. -* Support for searching streams. This can reuse most of the underlying code, - but does require careful buffering support. -* Support for anchored searches, which permit efficient `is_prefix` checks for - a large number of patterns. - -When you combine all of this together along with trying to make everything as -fast as possible, what you end up with is enitrely too much code with too much -`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead, -we will explain it. - - -# Basics - -The fundamental problem this crate is trying to solve is to determine the -occurrences of possibly many patterns in a haystack. The naive way to solve -this is to look for a match for each pattern at each position in the haystack: - - for i in 0..haystack.len(): - for p in patterns.iter(): - if haystack[i..].starts_with(p.bytes()): - return Match(p.id(), i, i + p.bytes().len()) - -Those four lines are effectively all this crate does. The problem with those -four lines is that they are very slow, especially when you're searching for a -large number of patterns. - -While there are many different algorithms available to solve this, a popular -one is Aho-Corasick. It's a common solution because it's not too hard to -implement, scales quite well even when searching for thousands of patterns and -is generally pretty fast. Aho-Corasick does well here because, regardless of -the number of patterns you're searching for, it always visits each byte in the -haystack exactly once. This means, generally speaking, adding more patterns to -an Aho-Corasick automaton does not make it slower. (Strictly speaking, however, -this is not true, since a larger automaton will make less effective use of the -CPU's cache.) - -Aho-Corasick can be succinctly described as a trie with state transitions -between some of the nodes that efficiently instruct the search algorithm to -try matching alternative keys in the automaton. The trick is that these state -transitions are arranged such that each byte of input needs to be inspected -only once. These state transitions are typically called "failure transitions," -because they instruct the searcher (the thing traversing the automaton while -reading from the haystack) what to do when a byte in the haystack does not -correspond to a valid transition in the current state of the trie. - -More formally, a failure transition points to a state in the automaton that may -lead to a match whose prefix is a proper suffix of the path traversed through -the trie so far. (If no such proper suffix exists, then the failure transition -points back to the start state of the trie, effectively restarting the search.) -This is perhaps simpler to explain pictorally. For example, let's say we built -an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The -trie looks like this: - - a - S1 - b - S2 - c - S3 - d - S4* - / - S0 - c - S5 - e - S6 - f - S7* - -where states marked with a `*` are match states (meaning, the search algorithm -should stop and report a match to the caller). - -So given this trie, it should be somewhat straight-forward to see how it can -be used to determine whether any particular haystack *starts* with either -`abcd` or `cef`. It's easy to express this in code: - - fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool { - let mut state_id = trie.start(); - // If the empty pattern is in trie, then state_id is a match state. - if trie.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - state_id = match trie.next_state(state_id, b) { - Some(id) => id, - // If there was no transition for this state and byte, then we know - // the haystack does not start with one of the patterns in our trie. - None => return false, - }; - if trie.is_match(state_id) { - return true; - } - } - false - } - -And that's pretty much it. All we do is move through the trie starting with the -bytes at the beginning of the haystack. If we find ourselves in a position -where we can't move, or if we've looked through the entire haystack without -seeing a match state, then we know the haystack does not start with any of the -patterns in the trie. - -The meat of the Aho-Corasick algorithm is in how we add failure transitions to -our trie to keep searching efficient. Specifically, it permits us to not only -check whether a haystack *starts* with any one of a number of patterns, but -rather, whether the haystack contains any of a number of patterns *anywhere* in -the haystack. - -As mentioned before, failure transitions connect a proper suffix of the path -traversed through the trie before, with a path that leads to a match that has a -prefix corresponding to that proper suffix. So in our case, for patterns `abcd` -and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from -the diagram above) from `S3` upon seeing that the byte following `c` is not -`d`. Namely, the proper suffix in this example is `c`, which is a prefix of -`cef`. So the modified diagram looks like this: - - - a - S1 - b - S2 - c - S3 - d - S4* - / / - / ---------------- - / / - S0 - c - S5 - e - S6 - f - S7* - -One thing that isn't shown in this diagram is that *all* states have a failure -transition, but only `S3` has a *non-trivial* failure transition. That is, all -other states have a failure transition back to the start state. So if our -haystack was `abzabcd`, then the searcher would transition back to `S0` after -seeing `z`, which effectively restarts the search. (Because there is no pattern -in our trie that has a prefix of `bz` or `z`.) - -The code for traversing this *automaton* or *finite state machine* (it is no -longer just a trie) is not that much different from the `has_prefix` code -above: - - fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool { - let mut state_id = fsm.start(); - // If the empty pattern is in fsm, then state_id is a match state. - if fsm.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - // While the diagram above doesn't show this, we may wind up needing - // to follow multiple failure transitions before we land on a state - // in which we can advance. Therefore, when searching for the next - // state, we need to loop until we don't see a failure transition. - // - // This loop terminates because the start state has no empty - // transitions. Every transition from the start state either points to - // another state, or loops back to the start state. - loop { - match fsm.next_state(state_id, b) { - Some(id) => { - state_id = id; - break; - } - // Unlike our code above, if there was no transition for this - // state, then we don't quit. Instead, we look for this state's - // failure transition and follow that instead. - None => { - state_id = fsm.next_fail_state(state_id); - } - }; - } - if fsm.is_match(state_id) { - return true; - } - } - false - } - -Other than the complication around traversing failure transitions, this code -is still roughly "traverse the automaton with bytes from the haystack, and quit -when a match is seen." - -And that concludes our section on the basics. While we didn't go deep into -how the automaton is built (see `src/nfa.rs`, which has detailed comments about -that), the basic structure of Aho-Corasick should be reasonably clear. - - -# NFAs and DFAs - -There are generally two types of finite automata: non-deterministic finite -automata (NFA) and deterministic finite automata (DFA). The difference between -them is, principally, that an NFA can be in multiple states at once. This is -typically accomplished by things called _epsilon_ transitions, where one could -move to a new state without consuming any bytes from the input. (The other -mechanism by which NFAs can be in more than one state is where the same byte in -a particular state transitions to multiple distinct states.) In contrast, a DFA -can only ever be in one state at a time. A DFA has no epsilon transitions, and -for any given state, a byte transitions to at most one other state. - -By this formulation, the Aho-Corasick automaton described in the previous -section is an NFA. This is because failure transitions are, effectively, -epsilon transitions. That is, whenever the automaton is in state `S`, it is -actually in the set of states that are reachable by recursively following -failure transitions from `S`. (This means that, for example, the start state -is always active since the start state is reachable via failure transitions -from any state in the automaton.) - -NFAs have a lot of nice properties. They tend to be easier to construct, and -also tend to use less memory. However, their primary downside is that they are -typically slower to execute. For example, the code above showing how to search -with an Aho-Corasick automaton needs to potentially iterate through many -failure transitions for every byte of input. While this is a fairly small -amount of overhead, this can add up, especially if the automaton has a lot of -overlapping patterns with a lot of failure transitions. - -A DFA's search code, by contrast, looks like this: - - fn contains(dfa: &DFA, haystack: &[u8]) -> bool { - let mut state_id = dfa.start(); - // If the empty pattern is in dfa, then state_id is a match state. - if dfa.is_match(state_id) { - return true; - } - for (i, &b) in haystack.iter().enumerate() { - // An Aho-Corasick DFA *never* has a missing state that requires - // failure transitions to be followed. One byte of input advances the - // automaton by one state. Always. - state_id = dfa.next_state(state_id, b); - if dfa.is_match(state_id) { - return true; - } - } - false - } - -The search logic here is much simpler than for the NFA, and this tends to -translate into significant performance benefits as well, since there's a lot -less work being done for each byte in the haystack. How is this accomplished? -It's done by pre-following all failure transitions for all states for all bytes -in the alphabet, and then building a single state transition table. Building -this DFA can be much more costly than building the NFA, and use much more -memory, but the better performance can be worth it. - -Users of this crate can actually choose between using an NFA or a DFA. By -default, an NFA is used, because it typically strikes the best balance between -space usage and search performance. But the DFA option is available for cases -where a little extra memory and upfront time building the automaton is okay. -For example, the `AhoCorasick::auto_configure` and -`AhoCorasickBuilder::auto_configure` methods will enable the DFA setting if -there are a small number of patterns. - - -# More DFA tricks - -As described in the previous section, one of the downsides of using a DFA -is that it uses more memory and can take longer to build. One small way of -mitigating these concerns is to map the alphabet used by the automaton into -a smaller space. Typically, the alphabet of a DFA has 256 elements in it: -one element for each possible value that fits into a byte. However, in many -cases, one does not need the full alphabet. For example, if all patterns in an -Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct -bytes. As far as the automaton is concerned, the rest of the 204 bytes are -indistinguishable from one another: they will never disrciminate between a -match or a non-match. Therefore, in cases like that, the alphabet can be shrunk -to just 53 elements. One for each ASCII letter, and then another to serve as a -placeholder for every other unused byte. - -In practice, this library doesn't quite compute the optimal set of equivalence -classes, but it's close enough in most cases. The key idea is that this then -allows the transition table for the DFA to be potentially much smaller. The -downside of doing this, however, is that since the transition table is defined -in terms of this smaller alphabet space, every byte in the haystack must be -re-mapped to this smaller space. This requires an additional 256-byte table. -In practice, this can lead to a small search time hit, but it can be difficult -to measure. Moreover, it can sometimes lead to faster search times for bigger -automata, since it could be difference between more parts of the automaton -staying in the CPU cache or not. - -One other trick for DFAs employed by this crate is the notion of premultiplying -state identifiers. Specifically, the normal way to compute the next transition -in a DFA is via the following (assuming that the transition table is laid out -sequentially in memory, in row-major order, where the rows are states): - - next_state_id = dfa.transitions[current_state_id * 256 + current_byte] - -However, since the value `256` is a fixed constant, we can actually premultiply -the state identifiers in the table when we build the table initially. Then, the -next transition computation simply becomes: - - next_state_id = dfa.transitions[current_state_id + current_byte] - -This doesn't seem like much, but when this is being executed for every byte of -input that you're searching, saving that extra multiplication instruction can -add up. - -The same optimization works even when equivalence classes are enabled, as -described above. The only difference is that the premultiplication is by the -total number of equivalence classes instead of 256. - -There isn't much downside to premultiplying state identifiers, other than the -fact that you may need to choose a bigger integer representation than you would -otherwise. For example, if you don't premultiply state identifiers, then an -automaton that uses `u8` as a state identifier can hold up to 256 states. -However, if they are premultiplied, then it can only hold up to -`floor(256 / len(alphabet))` states. Thus premultiplication impacts how compact -your DFA can be. In practice, it's pretty rare to use `u8` as a state -identifier, so premultiplication is usually a good thing to do. - -Both equivalence classes and premultiplication are tuneable parameters via the -`AhoCorasickBuilder` type, and both are enabled by default. - - -# Match semantics - -One of the more interesting things about this implementation of Aho-Corasick -that (as far as this author knows) separates it from other implementations, is -that it natively supports leftmost-first and leftmost-longest match semantics. -Briefly, match semantics refer to the decision procedure by which searching -will disambiguate matches when there are multiple to choose from: - -* **standard** match semantics emits matches as soon as they are detected by - the automaton. This is typically equivalent to the textbook non-overlapping - formulation of Aho-Corasick. -* **leftmost-first** match semantics means that 1) the next match is the match - starting at the leftmost position and 2) among multiple matches starting at - the same leftmost position, the match corresponding to the pattern provided - first by the caller is reported. -* **leftmost-longest** is like leftmost-first, except when there are multiple - matches starting at the same leftmost position, the pattern corresponding to - the longest match is returned. - -(The crate API documentation discusses these differences, with examples, in -more depth on the `MatchKind` type.) - -The reason why supporting these match semantics is important is because it -gives the user more control over the match procedure. For example, -leftmost-first permits users to implement match priority by simply putting the -higher priority patterns first. Leftmost-longest, on the other hand, permits -finding the longest possible match, which might be useful when trying to find -words matching a dictionary. Additionally, regex engines often want to use -Aho-Corasick as an optimization when searching for an alternation of literals. -In order to preserve correct match semantics, regex engines typically can't use -the standard textbook definition directly, since regex engines will implement -either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics. - -Supporting leftmost semantics requires a couple key changes: - -* Constructing the Aho-Corasick automaton changes a bit in both how the trie is - constructed and how failure transitions are found. Namely, only a subset of - the failure transitions are added. Specifically, only the failure transitions - that either do not occur after a match or do occur after a match but preserve - that match are kept. (More details on this can be found in `src/nfa.rs`.) -* The search algorithm changes slightly. Since we are looking for the leftmost - match, we cannot quit as soon as a match is detected. Instead, after a match - is detected, we must keep searching until either the end of the input or - until a dead state is seen. (Dead states are not used for standard match - semantics. Dead states mean that searching should stop after a match has been - found.) - -Other implementations of Aho-Corasick do support leftmost match semantics, but -they do it with more overhead at search time, or even worse, with a queue of -matches and sophisticated hijinks to disambiguate the matches. While our -construction algorithm becomes a bit more complicated, the correct match -semantics fall out from the structure of the automaton itself. - - -# Overlapping matches - -One of the nice properties of an Aho-Corasick automaton is that it can report -all possible matches, even when they overlap with one another. In this mode, -the match semantics don't matter, since all possible matches are reported. -Overlapping searches work just like regular searches, except the state -identifier at which the previous search left off is carried over to the next -search, so that it can pick up where it left off. If there are additional -matches at that state, then they are reported before resuming the search. - -Enabling leftmost-first or leftmost-longest match semantics causes the -automaton to use a subset of all failure transitions, which means that -overlapping searches cannot be used. Therefore, if leftmost match semantics are -used, attempting to do an overlapping search will panic. Thus, to get -overlapping searches, the caller must use the default standard match semantics. -This behavior was chosen because there are only two alternatives, which were -deemed worse: - -* Compile two automatons internally, one for standard semantics and one for - the semantics requested by the caller (if not standard). -* Create a new type, distinct from the `AhoCorasick` type, which has different - capabilities based on the configuration options. - -The first is untenable because of the amount of memory used by the automaton. -The second increases the complexity of the API too much by adding too many -types that do similar things. It is conceptually much simpler to keep all -searching isolated to a single type. Callers may query whether the automaton -supports overlapping searches via the `AhoCorasick::supports_overlapping` -method. - - -# Stream searching - -Since Aho-Corasick is an automaton, it is possible to do partial searches on -partial parts of the haystack, and then resume that search on subsequent pieces -of the haystack. This is useful when the haystack you're trying to search is -not stored contiguously in memory, or if one does not want to read the entire -haystack into memory at once. - -Currently, only standard semantics are supported for stream searching. This is -some of the more complicated code in this crate, and is something I would very -much like to improve. In particular, it currently has the restriction that it -must buffer at least enough of the haystack in memory in order to fit the -longest possible match. The difficulty in getting stream searching right is -that the implementation choices (such as the buffer size) often impact what the -API looks like and what it's allowed to do. - - -# Prefilters - -In some cases, Aho-Corasick is not the fastest way to find matches containing -multiple patterns. Sometimes, the search can be accelerated using highly -optimized SIMD routines. For example, consider searching the following -patterns: - - Sherlock - Moriarty - Watson - -It is plausible that it would be much faster to quickly look for occurrences of -the leading bytes, `S`, `M` or `W`, before trying to start searching via the -automaton. Indeed, this is exactly what this crate will do. - -When there are more than three distinct starting bytes, then this crate will -look for three distinct bytes occurring at any position in the patterns, while -preferring bytes that are heuristically determined to be rare over others. For -example: - - Abuzz - Sanchez - Vasquez - Topaz - Waltz - -Here, we have more than 3 distinct starting bytes, but all of the patterns -contain `z`, which is typically a rare byte. In this case, the prefilter will -scan for `z`, back up a bit, and then execute the Aho-Corasick automaton. - -If all of that fails, then a packed multiple substring algorithm will be -attempted. Currently, the only algorithm available for this is Teddy, but more -may be added in the future. Teddy is unlike the above prefilters in that it -confirms its own matches, so when Teddy is active, it might not be necessary -for Aho-Corasick to run at all. (See `Automaton::leftmost_find_at_no_state_imp` -in `src/automaton.rs`.) However, the current Teddy implementation only works -in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only works -_well_ when there are a small number of patterns (say, less than 100). Teddy -also requires the haystack to be of a certain length (more than 16-34 bytes). -When the haystack is shorter than that, Rabin-Karp is used instead. (See -`src/packed/rabinkarp.rs`.) - -There is a more thorough description of Teddy at -[`src/packed/teddy/README.md`](src/packed/teddy/README.md). diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/LICENSE-MIT 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/README.md clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/README.md --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/README.md 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,187 +0,0 @@ -aho-corasick -============ -A library for finding occurrences of many patterns at once with SIMD -acceleration in some cases. This library provides multiple pattern -search principally through an implementation of the -[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm), -which builds a finite state machine for executing searches in linear time. -Features include case insensitive matching, overlapping matches, fast searching -via SIMD and optional full DFA construction and search & replace in streams. - -[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions) -[![crates.io](https://img.shields.io/crates/v/aho-corasick.svg)](https://crates.io/crates/aho-corasick) - -Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). - - -### Documentation - -https://docs.rs/aho-corasick - - -### Usage - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -aho-corasick = "0.7" -``` - - -### Example: basic searching - -This example shows how to search for occurrences of multiple patterns -simultaneously. Each match includes the pattern that matched along with the -byte offsets of the match. - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["apple", "maple", "Snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasick::new(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - - -### Example: case insensitivity - -This is like the previous example, but matches `Snapple` case insensitively -using `AhoCorasickBuilder`: - -```rust -use aho_corasick::AhoCorasickBuilder; - -let patterns = &["apple", "maple", "snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - - -### Example: replacing matches in a stream - -This example shows how to execute a search and replace on a stream without -loading the entire stream into memory first. - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["fox", "brown", "quick"]; -let replace_with = &["sloth", "grey", "slow"]; - -// In a real example, these might be `std::fs::File`s instead. All you need to -// do is supply a pair of `std::io::Read` and `std::io::Write` implementations. -let rdr = "The quick brown fox."; -let mut wtr = vec![]; - -let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with) - .expect("stream_replace_all failed"); -assert_eq!(b"The slow grey sloth.".to_vec(), wtr); -``` - - -### Example: finding the leftmost first match - -In the textbook description of Aho-Corasick, its formulation is typically -structured such that it reports all possible matches, even when they overlap -with another. In many cases, overlapping matches may not be desired, such as -the case of finding all successive non-overlapping matches like you might with -a standard regular expression. - -Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do -this doesn't always work in the expected way, since it will report matches as -soon as they are seen. For example, consider matching the regex `Samwise|Sam` -against the text `Samwise`. Most regex engines (that are Perl-like, or -non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick -algorithm modified for reporting non-overlapping matches will report `Sam`. - -A novel contribution of this library is the ability to change the match -semantics of Aho-Corasick (without additional search time overhead) such that -`Samwise` is reported instead. For example, here's the standard approach: - -```rust -use aho_corasick::AhoCorasick; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasick::new(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Sam", &haystack[mat.start()..mat.end()]); -``` - -And now here's the leftmost-first version, which matches how a Perl-like -regex will work: - -```rust -use aho_corasick::{AhoCorasickBuilder, MatchKind}; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); -``` - -In addition to leftmost-first semantics, this library also supports -leftmost-longest semantics, which match the POSIX behavior of a regular -expression alternation. See `MatchKind` in the docs for more details. - - -### Minimum Rust version policy - -This crate's minimum supported `rustc` version is `1.41.1`. - -The current policy is that the minimum Rust version required to use this crate -can be increased in minor version updates. For example, if `crate 1.0` requires -Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust -1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum -version of Rust. - -In general, this crate will be conservative with respect to the minimum -supported version of Rust. - - -### FFI bindings - -* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/) -is a Python wrapper for this library. - - -### Future work - -Here are some plans for the future: - -* Assuming the current API is sufficient, I'd like to commit to it and release - a `1.0` version of this crate some time in the next 6-12 months. -* Support stream searching with leftmost match semantics. Currently, only - standard match semantics are supported. Getting this right seems possible, - but is tricky since the match state needs to be propagated through multiple - searches. (With standard semantics, as soon as a match is seen the search - ends.) diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/UNLICENSE 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/rustfmt.toml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -max_width = 79 -use_small_heuristics = "max" diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/ahocorasick.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,2141 +0,0 @@ -use std::io; - -use crate::automaton::Automaton; -use crate::buffer::Buffer; -use crate::dfa::{self, DFA}; -use crate::error::Result; -use crate::nfa::{self, NFA}; -use crate::packed; -use crate::prefilter::{Prefilter, PrefilterState}; -use crate::state_id::StateID; -use crate::Match; - -/// An automaton for searching multiple strings in linear time. -/// -/// The `AhoCorasick` type supports a few basic ways of constructing an -/// automaton, including -/// [`AhoCorasick::new`](struct.AhoCorasick.html#method.new) -/// and -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). -/// However, there are a fair number of configurable options that can be set -/// by using -/// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) -/// instead. Such options include, but are not limited to, how matches are -/// determined, simple case insensitivity, whether to use a DFA or not and -/// various knobs for controlling the space-vs-time trade offs taken when -/// building the automaton. -/// -/// If you aren't sure where to start, try beginning with -/// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured). -/// -/// # Resource usage -/// -/// Aho-Corasick automatons are always constructed in `O(p)` time, where `p` -/// is the combined length of all patterns being searched. With that said, -/// building an automaton can be fairly costly because of high constant -/// factors, particularly when enabling the -/// [DFA](struct.AhoCorasickBuilder.html#method.dfa) -/// option (which is disabled by default). For this reason, it's generally a -/// good idea to build an automaton once and reuse it as much as possible. -/// -/// Aho-Corasick automatons can also use a fair bit of memory. To get a -/// concrete idea of how much memory is being used, try using the -/// [`AhoCorasick::heap_bytes`](struct.AhoCorasick.html#method.heap_bytes) -/// method. -/// -/// # Examples -/// -/// This example shows how to search for occurrences of multiple patterns -/// simultaneously in a case insensitive fashion. Each match includes the -/// pattern that matched along with the byte offsets of the match. -/// -/// ``` -/// use aho_corasick::AhoCorasickBuilder; -/// -/// let patterns = &["apple", "maple", "snapple"]; -/// let haystack = "Nobody likes maple in their apple flavored Snapple."; -/// -/// let ac = AhoCorasickBuilder::new() -/// .ascii_case_insensitive(true) -/// .build(patterns); -/// let mut matches = vec![]; -/// for mat in ac.find_iter(haystack) { -/// matches.push((mat.pattern(), mat.start(), mat.end())); -/// } -/// assert_eq!(matches, vec![ -/// (1, 13, 18), -/// (0, 28, 33), -/// (2, 43, 50), -/// ]); -/// ``` -/// -/// This example shows how to replace matches with some other string: -/// -/// ``` -/// use aho_corasick::AhoCorasick; -/// -/// let patterns = &["fox", "brown", "quick"]; -/// let haystack = "The quick brown fox."; -/// let replace_with = &["sloth", "grey", "slow"]; -/// -/// let ac = AhoCorasick::new(patterns); -/// let result = ac.replace_all(haystack, replace_with); -/// assert_eq!(result, "The slow grey sloth."); -/// ``` -#[derive(Clone, Debug)] -pub struct AhoCorasick { - imp: Imp, - match_kind: MatchKind, -} - -impl AhoCorasick { - /// Create a new Aho-Corasick automaton using the default configuration. - /// - /// The default configuration optimizes for less space usage, but at the - /// expense of longer search times. To change the configuration, use - /// [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) - /// for fine-grained control, or - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// for automatic configuration if you aren't sure which settings to pick. - /// - /// This uses the default - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// match semantics, which reports a match as soon as it is found. This - /// corresponds to the standard match semantics supported by textbook - /// descriptions of the Aho-Corasick algorithm. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn new(patterns: I) -> AhoCorasick - where - I: IntoIterator, - P: AsRef<[u8]>, - { - AhoCorasickBuilder::new().build(patterns) - } - - /// Build an Aho-Corasick automaton with an automatically determined - /// configuration. - /// - /// Specifically, this requires a slice of patterns instead of an iterator - /// since the configuration is determined by looking at the patterns before - /// constructing the automaton. The idea here is to balance space and time - /// automatically. That is, when searching a small number of patterns, this - /// will attempt to use the fastest possible configuration since the total - /// space required will be small anyway. As the number of patterns grows, - /// this will fall back to slower configurations that use less space. - /// - /// If you want auto configuration but with match semantics different from - /// the default `MatchKind::Standard`, then use - /// [`AhoCorasickBuilder::auto_configure`](struct.AhoCorasickBuilder.html#method.auto_configure). - /// - /// # Examples - /// - /// Basic usage is just like `new`, except you must provide a slice: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new_auto_configured(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn new_auto_configured(patterns: &[B]) -> AhoCorasick - where - B: AsRef<[u8]>, - { - AhoCorasickBuilder::new().auto_configure(patterns).build(patterns) - } -} - -impl AhoCorasick { - /// Returns true if and only if this automaton matches the haystack at any - /// position. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert!(ac.is_match("xxx bar xxx")); - /// assert!(!ac.is_match("xxx qux xxx")); - /// ``` - pub fn is_match>(&self, haystack: B) -> bool { - self.earliest_find(haystack).is_some() - } - - /// Returns the location of the first detected match in `haystack`. - /// - /// This method has the same behavior regardless of the - /// [`MatchKind`](enum.MatchKind.html) - /// of this automaton. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "abc", "b", - /// ]); - /// let mat = ac.earliest_find("abcd").expect("should have match"); - /// assert_eq!(1, mat.pattern()); - /// assert_eq!((1, 2), (mat.start(), mat.end())); - /// ``` - pub fn earliest_find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - let mut start = self.imp.start_state(); - self.imp.earliest_find_at( - &mut prestate, - haystack.as_ref(), - 0, - &mut start, - ) - } - - /// Returns the location of the first match according to the match - /// semantics that this automaton was constructed with. - /// - /// When using `MatchKind::Standard`, this corresponds precisely to the - /// same behavior as - /// [`earliest_find`](struct.AhoCorasick.html#method.earliest_find). - /// Otherwise, match semantics correspond to either - /// [leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) - /// or - /// [leftmost-longest](enum.MatchKind.html#variant.LeftmostLongest). - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("b", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Now with leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// And finally, leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); - /// ``` - pub fn find>(&self, haystack: B) -> Option { - let mut prestate = PrefilterState::new(self.max_pattern_len()); - self.imp.find_at_no_state(&mut prestate, haystack.as_ref(), 0) - } - - /// Returns an iterator of non-overlapping matches, using the match - /// semantics that this automaton was constructed with. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![2, 2, 2], matches); - /// ``` - /// - /// Now with leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 2, 0], matches); - /// ``` - /// - /// And finally, leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let matches: Vec = ac - /// .find_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 2, 1], matches); - /// ``` - pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindIter<'a, 'b, S> { - FindIter::new(self, haystack.as_ref()) - } - - /// Returns an iterator of overlapping matches in the given `haystack`. - /// - /// Overlapping matches can _only_ be detected using - /// `MatchKind::Standard` semantics. If this automaton was constructed with - /// leftmost semantics, then this method will panic. To determine whether - /// this will panic at runtime, use the - /// [`AhoCorasick::supports_overlapping`](struct.AhoCorasick.html#method.supports_overlapping) - /// method. - /// - /// `haystack` may be any type that is cheaply convertible to a `&[u8]`. - /// This includes, but is not limited to, `String`, `&str`, `Vec`, and - /// `&[u8]` itself. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_overlapping` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. - /// - /// # Examples - /// - /// Basic usage, with standard semantics: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasick::new(patterns); - /// let matches: Vec = ac - /// .find_overlapping_iter(haystack) - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![2, 0, 2, 2, 0, 1], matches); - /// ``` - pub fn find_overlapping_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindOverlappingIter<'a, 'b, S> { - FindOverlappingIter::new(self, haystack.as_ref()) - } - - /// Replace all matches with a corresponding value in the `replace_with` - /// slice given. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// # Panics - /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let result = ac.replace_all(haystack, &["x", "y", "z"]); - /// assert_eq!("x the z to the xage", result); - /// ``` - pub fn replace_all(&self, haystack: &str, replace_with: &[B]) -> String - where - B: AsRef, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = String::with_capacity(haystack.len()); - self.replace_all_with(haystack, &mut dst, |mat, _, dst| { - dst.push_str(replace_with[mat.pattern()].as_ref()); - true - }); - dst - } - - /// Replace all matches using raw bytes with a corresponding value in the - /// `replace_with` slice given. Matches correspond to the same matches as - /// reported by [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// # Panics - /// - /// This panics when `replace_with.len()` does not equal the total number - /// of patterns that are matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = b"append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let result = ac.replace_all_bytes(haystack, &["x", "y", "z"]); - /// assert_eq!(b"x the z to the xage".to_vec(), result); - /// ``` - pub fn replace_all_bytes( - &self, - haystack: &[u8], - replace_with: &[B], - ) -> Vec - where - B: AsRef<[u8]>, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "replace_all_bytes requires a replacement for every pattern \ - in the automaton" - ); - let mut dst = Vec::with_capacity(haystack.len()); - self.replace_all_with_bytes(haystack, &mut dst, |mat, _, dst| { - dst.extend(replace_with[mat.pattern()].as_ref()); - true - }); - dst - } - - /// Replace all matches using a closure called on each match. - /// Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and a string buffer with which to write the replaced text - /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns `false`, then searching is stopped. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mut result = String::new(); - /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); - /// true - /// }); - /// assert_eq!("0 the 2 to the 0age", result); - /// ``` - /// - /// Stopping the replacement by returning `false` (continued from the - /// example above): - /// - /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// # let patterns = &["append", "appendage", "app"]; - /// # let haystack = "append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() - /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); - /// let mut result = String::new(); - /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| { - /// dst.push_str(&mat.pattern().to_string()); - /// mat.pattern() != 2 - /// }); - /// assert_eq!("0 the 2 to the appendage", result); - /// ``` - pub fn replace_all_with( - &self, - haystack: &str, - dst: &mut String, - mut replace_with: F, - ) where - F: FnMut(&Match, &str, &mut String) -> bool, - { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.push_str(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.push_str(&haystack[last_match..]); - } - - /// Replace all matches using raw bytes with a closure called on each - /// match. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and a byte buffer with which to write the replaced text - /// (if any). If the closure returns `true`, then it continues to the next - /// match. If the closure returns `false`, then searching is stopped. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = b"append the app to the appendage"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mut result = vec![]; - /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); - /// true - /// }); - /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result); - /// ``` - /// - /// Stopping the replacement by returning `false` (continued from the - /// example above): - /// - /// ``` - /// # use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// # let patterns = &["append", "appendage", "app"]; - /// # let haystack = b"append the app to the appendage"; - /// # let ac = AhoCorasickBuilder::new() - /// # .match_kind(MatchKind::LeftmostFirst) - /// # .build(patterns); - /// let mut result = vec![]; - /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| { - /// dst.extend(mat.pattern().to_string().bytes()); - /// mat.pattern() != 2 - /// }); - /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result); - /// ``` - pub fn replace_all_with_bytes( - &self, - haystack: &[u8], - dst: &mut Vec, - mut replace_with: F, - ) where - F: FnMut(&Match, &[u8], &mut Vec) -> bool, - { - let mut last_match = 0; - for mat in self.find_iter(haystack) { - dst.extend(&haystack[last_match..mat.start()]); - last_match = mat.end(); - if !replace_with(&mat, &haystack[mat.start()..mat.end()], dst) { - break; - }; - } - dst.extend(&haystack[last_match..]); - } - - /// Returns an iterator of non-overlapping matches in the given - /// stream. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The matches yielded by this iterator use absolute position offsets in - /// the stream given, where the first byte has index `0`. Matches are - /// yieled until the stream is exhausted. - /// - /// Each item yielded by the iterator is an `io::Result`, where an - /// error is yielded if there was a problem reading from the reader given. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["append", "appendage", "app"]; - /// let haystack = "append the app to the appendage"; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut matches = vec![]; - /// for result in ac.stream_find_iter(haystack.as_bytes()) { - /// let mat = result?; - /// matches.push(mat.pattern()); - /// } - /// assert_eq!(vec![2, 2, 2], matches); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_find_iter<'a, R: io::Read>( - &'a self, - rdr: R, - ) -> StreamFindIter<'a, R, S> { - StreamFindIter::new(self, rdr) - } - - /// Search for and replace all matches of this automaton in - /// the given reader, and write the replacements to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// Replacements are determined by the index of the matching pattern. - /// For example, if the pattern with index `2` is found, then it is - /// replaced by `replace_with[2]`. - /// - /// After all matches are replaced, the writer is _not_ flushed. - /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; - /// let replace_with = &["sloth", "grey", "slow"]; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all(haystack.as_bytes(), &mut result, replace_with)?; - /// assert_eq!(b"The slow grey sloth.".to_vec(), result); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_replace_all( - &self, - rdr: R, - wtr: W, - replace_with: &[B], - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - B: AsRef<[u8]>, - { - assert_eq!( - replace_with.len(), - self.pattern_count(), - "stream_replace_all requires a replacement for every pattern \ - in the automaton" - ); - self.stream_replace_all_with(rdr, wtr, |mat, _, wtr| { - wtr.write_all(replace_with[mat.pattern()].as_ref()) - }) - } - - /// Search the given reader and replace all matches of this automaton - /// using the given closure. The result is written to the given - /// writer. Matches correspond to the same matches as reported by - /// [`find_iter`](struct.AhoCorasick.html#method.find_iter). - /// - /// The closure accepts three parameters: the match found, the text of - /// the match and the writer with which to write the replaced text (if any). - /// - /// After all matches are replaced, the writer is _not_ flushed. - /// - /// If there was a problem reading from the given reader or writing to the - /// given writer, then the corresponding `io::Error` is returned and all - /// replacement is stopped. - /// - /// When searching a stream, an internal buffer is used. Therefore, callers - /// should avoiding providing a buffered reader, if possible. However, - /// callers may want to provide a buffered writer. - /// - /// Searching a stream requires that the automaton was built with - /// `MatchKind::Standard` semantics. If this automaton was constructed - /// with leftmost semantics, then this method will panic. To determine - /// whether this will panic at runtime, use the - /// [`AhoCorasick::supports_stream`](struct.AhoCorasick.html#method.supports_stream) - /// method. - /// - /// # Memory usage - /// - /// In general, searching streams will use a constant amount of memory for - /// its internal buffer. The one requirement is that the internal buffer - /// must be at least the size of the longest possible match. In most use - /// cases, the default buffer size will be much larger than any individual - /// match. - /// - /// # Panics - /// - /// This panics when `AhoCorasick::supports_stream` returns `false`. - /// That is, this panics when this automaton's match semantics are not - /// `MatchKind::Standard`. This restriction may be lifted in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io::Write; - /// use aho_corasick::AhoCorasick; - /// - /// # fn example() -> Result<(), ::std::io::Error> { - /// let patterns = &["fox", "brown", "quick"]; - /// let haystack = "The quick brown fox."; - /// - /// let ac = AhoCorasick::new(patterns); - /// let mut result = vec![]; - /// ac.stream_replace_all_with( - /// haystack.as_bytes(), - /// &mut result, - /// |mat, _, wtr| { - /// wtr.write_all(mat.pattern().to_string().as_bytes()) - /// }, - /// )?; - /// assert_eq!(b"The 2 1 0.".to_vec(), result); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn stream_replace_all_with( - &self, - rdr: R, - mut wtr: W, - mut replace_with: F, - ) -> io::Result<()> - where - R: io::Read, - W: io::Write, - F: FnMut(&Match, &[u8], &mut W) -> io::Result<()>, - { - let mut it = StreamChunkIter::new(self, rdr); - while let Some(result) = it.next() { - let chunk = result?; - match chunk { - StreamChunk::NonMatch { bytes, .. } => { - wtr.write_all(bytes)?; - } - StreamChunk::Match { bytes, mat } => { - replace_with(&mat, bytes, &mut wtr)?; - } - } - } - Ok(()) - } - - /// Returns the match kind used by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasick, MatchKind}; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(&MatchKind::Standard, ac.match_kind()); - /// ``` - pub fn match_kind(&self) -> &MatchKind { - self.imp.match_kind() - } - - /// Returns the length of the longest pattern matched by this automaton. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "quux", "baz", - /// ]); - /// assert_eq!(4, ac.max_pattern_len()); - /// ``` - pub fn max_pattern_len(&self) -> usize { - self.imp.max_pattern_len() - } - - /// Return the total number of patterns matched by this automaton. - /// - /// This includes patterns that may never participate in a match. For - /// example, if - /// [`MatchKind::LeftmostFirst`](enum.MatchKind.html#variant.LeftmostFirst) - /// match semantics are used, and the patterns `Sam` and `Samwise` were - /// used to build the automaton, then `Samwise` can never participate in a - /// match because `Sam` will always take priority. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasick; - /// - /// let ac = AhoCorasick::new(&[ - /// "foo", "bar", "baz", - /// ]); - /// assert_eq!(3, ac.pattern_count()); - /// ``` - pub fn pattern_count(&self) -> usize { - self.imp.pattern_count() - } - - /// Returns true if and only if this automaton supports reporting - /// overlapping matches. - /// - /// If this returns false and overlapping matches are requested, then it - /// will result in a panic. - /// - /// Since leftmost matching is inherently incompatible with overlapping - /// matches, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports overlapping matches. This is unlikely to change in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_overlapping()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_overlapping()); - /// ``` - pub fn supports_overlapping(&self) -> bool { - self.match_kind.supports_overlapping() - } - - /// Returns true if and only if this automaton supports stream searching. - /// - /// If this returns false and stream searching (or replacing) is attempted, - /// then it will result in a panic. - /// - /// Currently, only - /// [`MatchKind::Standard`](enum.MatchKind.html#variant.Standard) - /// supports streaming. This may be expanded in the future. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) - /// .build(&["foo", "bar", "baz"]); - /// assert!(ac.supports_stream()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(&["foo", "bar", "baz"]); - /// assert!(!ac.supports_stream()); - /// ``` - pub fn supports_stream(&self) -> bool { - self.match_kind.supports_stream() - } - - /// Returns the approximate total amount of heap used by this automaton, in - /// units of bytes. - /// - /// # Examples - /// - /// This example shows the difference in heap usage between a few - /// configurations: - /// - /// ```ignore - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_336, ac.heap_bytes()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(false) // default - /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(10_384, ac.heap_bytes()); - /// - /// let ac = AhoCorasickBuilder::new() - /// .dfa(true) - /// .ascii_case_insensitive(true) - /// .build(&["foo", "bar", "baz"]); - /// assert_eq!(1_248, ac.heap_bytes()); - /// ``` - pub fn heap_bytes(&self) -> usize { - match self.imp { - Imp::NFA(ref nfa) => nfa.heap_bytes(), - Imp::DFA(ref dfa) => dfa.heap_bytes(), - } - } -} - -/// The internal implementation of Aho-Corasick, which is either an NFA or -/// a DFA. The NFA is slower but uses less memory. The DFA is faster but uses -/// more memory. -#[derive(Clone, Debug)] -enum Imp { - NFA(NFA), - DFA(DFA), -} - -impl Imp { - /// Returns the type of match semantics implemented by this automaton. - fn match_kind(&self) -> &MatchKind { - match *self { - Imp::NFA(ref nfa) => nfa.match_kind(), - Imp::DFA(ref dfa) => dfa.match_kind(), - } - } - - /// Returns the identifier of the start state. - fn start_state(&self) -> S { - match *self { - Imp::NFA(ref nfa) => nfa.start_state(), - Imp::DFA(ref dfa) => dfa.start_state(), - } - } - - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for maintaining correct buffer sizes when - /// searching on streams. - fn max_pattern_len(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.max_pattern_len(), - Imp::DFA(ref dfa) => dfa.max_pattern_len(), - } - } - - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. The maximum matching pattern that can be - /// reported is exactly one less than this number. - fn pattern_count(&self) -> usize { - match *self { - Imp::NFA(ref nfa) => nfa.pattern_count(), - Imp::DFA(ref dfa) => dfa.pattern_count(), - } - } - - /// Returns the prefilter object, if one exists, for the underlying - /// automaton. - fn prefilter(&self) -> Option<&dyn Prefilter> { - match *self { - Imp::NFA(ref nfa) => nfa.prefilter(), - Imp::DFA(ref dfa) => dfa.prefilter(), - } - } - - /// Returns true if and only if we should attempt to use a prefilter. - fn use_prefilter(&self) -> bool { - let p = match self.prefilter() { - None => return false, - Some(p) => p, - }; - !p.looks_for_non_start_of_match() - } - - #[inline(always)] - fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - Imp::DFA(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - } - } - - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => { - nfa.earliest_find_at(prestate, haystack, at, state_id) - } - Imp::DFA(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - } - } - - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - Imp::NFA(ref nfa) => nfa.find_at_no_state(prestate, haystack, at), - Imp::DFA(ref dfa) => dfa.find_at_no_state(prestate, haystack, at), - } - } -} - -/// An iterator of non-overlapping matches in a particular haystack. -/// -/// This iterator yields matches according to the -/// [`MatchKind`](enum.MatchKind.html) -/// used by this automaton. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::find_iter`](struct.AhoCorasick.html#method.find_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -/// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, -} - -impl<'a, 'b, S: StateID> FindIter<'a, 'b, S> { - fn new(ac: &'a AhoCorasick, haystack: &'b [u8]) -> FindIter<'a, 'b, S> { - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindIter { fsm: &ac.imp, prestate, haystack, pos: 0 } - } -} - -impl<'a, 'b, S: StateID> Iterator for FindIter<'a, 'b, S> { - type Item = Match; - - fn next(&mut self) -> Option { - if self.pos > self.haystack.len() { - return None; - } - let result = self.fsm.find_at_no_state( - &mut self.prestate, - self.haystack, - self.pos, - ); - let mat = match result { - None => return None, - Some(mat) => mat, - }; - if mat.end() == self.pos { - // If the automaton can match the empty string and if we found an - // empty match, then we need to forcefully move the position. - self.pos += 1; - } else { - self.pos = mat.end(); - } - Some(mat) - } -} - -/// An iterator of overlapping matches in a particular haystack. -/// -/// This iterator will report all possible matches in a particular haystack, -/// even when the matches overlap. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::find_overlapping_iter`](struct.AhoCorasick.html#method.find_overlapping_iter) -/// method. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -/// -/// The lifetime `'b` refers to the lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindOverlappingIter<'a, 'b, S: StateID> { - fsm: &'a Imp, - prestate: PrefilterState, - haystack: &'b [u8], - pos: usize, - state_id: S, - match_index: usize, -} - -impl<'a, 'b, S: StateID> FindOverlappingIter<'a, 'b, S> { - fn new( - ac: &'a AhoCorasick, - haystack: &'b [u8], - ) -> FindOverlappingIter<'a, 'b, S> { - assert!( - ac.supports_overlapping(), - "automaton does not support overlapping searches" - ); - let prestate = PrefilterState::new(ac.max_pattern_len()); - FindOverlappingIter { - fsm: &ac.imp, - prestate, - haystack, - pos: 0, - state_id: ac.imp.start_state(), - match_index: 0, - } - } -} - -impl<'a, 'b, S: StateID> Iterator for FindOverlappingIter<'a, 'b, S> { - type Item = Match; - - fn next(&mut self) -> Option { - let result = self.fsm.overlapping_find_at( - &mut self.prestate, - self.haystack, - self.pos, - &mut self.state_id, - &mut self.match_index, - ); - match result { - None => return None, - Some(m) => { - self.pos = m.end(); - Some(m) - } - } - } -} - -/// An iterator that reports Aho-Corasick matches in a stream. -/// -/// This iterator yields elements of type `io::Result`, where an error -/// is reported if there was a problem reading from the underlying stream. -/// The iterator terminates only when the underlying stream reaches `EOF`. -/// -/// This iterator is constructed via the -/// [`AhoCorasick::stream_find_iter`](struct.AhoCorasick.html#method.stream_find_iter) -/// method. -/// -/// The type variable `R` refers to the `io::Read` stream that is being read -/// from. -/// -/// The type variable `S` refers to the representation used for state -/// identifiers. (By default, this is `usize`.) -/// -/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton. -#[derive(Debug)] -pub struct StreamFindIter<'a, R, S: StateID> { - it: StreamChunkIter<'a, R, S>, -} - -impl<'a, R: io::Read, S: StateID> StreamFindIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamFindIter<'a, R, S> { - StreamFindIter { it: StreamChunkIter::new(ac, rdr) } - } -} - -impl<'a, R: io::Read, S: StateID> Iterator for StreamFindIter<'a, R, S> { - type Item = io::Result; - - fn next(&mut self) -> Option> { - loop { - match self.it.next() { - None => return None, - Some(Err(err)) => return Some(Err(err)), - Some(Ok(StreamChunk::NonMatch { .. })) => {} - Some(Ok(StreamChunk::Match { mat, .. })) => { - return Some(Ok(mat)); - } - } - } - } -} - -/// An iterator over chunks in an underlying reader. Each chunk either -/// corresponds to non-matching bytes or matching bytes, but all bytes from -/// the underlying reader are reported in sequence. There may be an arbitrary -/// number of non-matching chunks before seeing a matching chunk. -/// -/// N.B. This does not actually implement Iterator because we need to borrow -/// from the underlying reader. But conceptually, it's still an iterator. -#[derive(Debug)] -struct StreamChunkIter<'a, R, S: StateID> { - /// The AC automaton. - fsm: &'a Imp, - /// State associated with this automaton's prefilter. It is a heuristic - /// for stopping the prefilter if it's deemed ineffective. - prestate: PrefilterState, - /// The source of bytes we read from. - rdr: R, - /// A fixed size buffer. This is what we actually search. There are some - /// invariants around the buffer's size, namely, it must be big enough to - /// contain the longest possible match. - buf: Buffer, - /// The ID of the FSM state we're currently in. - state_id: S, - /// The current position at which to start the next search in `buf`. - search_pos: usize, - /// The absolute position of `search_pos`, where `0` corresponds to the - /// position of the first byte read from `rdr`. - absolute_pos: usize, - /// The ending position of the last StreamChunk that was returned to the - /// caller. This position is used to determine whether we need to emit - /// non-matching bytes before emitting a match. - report_pos: usize, - /// A match that should be reported on the next call. - pending_match: Option, - /// Enabled only when the automaton can match the empty string. When - /// enabled, we need to execute one final search after consuming the - /// reader to find the trailing empty match. - has_empty_match_at_end: bool, -} - -/// A single chunk yielded by the stream chunk iterator. -/// -/// The `'r` lifetime refers to the lifetime of the stream chunk iterator. -#[derive(Debug)] -enum StreamChunk<'r> { - /// A chunk that does not contain any matches. - NonMatch { bytes: &'r [u8] }, - /// A chunk that precisely contains a match. - Match { bytes: &'r [u8], mat: Match }, -} - -impl<'a, R: io::Read, S: StateID> StreamChunkIter<'a, R, S> { - fn new(ac: &'a AhoCorasick, rdr: R) -> StreamChunkIter<'a, R, S> { - assert!( - ac.supports_stream(), - "stream searching is only supported for Standard match semantics" - ); - - let prestate = if ac.imp.use_prefilter() { - PrefilterState::new(ac.max_pattern_len()) - } else { - PrefilterState::disabled() - }; - let buf = Buffer::new(ac.imp.max_pattern_len()); - let state_id = ac.imp.start_state(); - StreamChunkIter { - fsm: &ac.imp, - prestate, - rdr, - buf, - state_id, - absolute_pos: 0, - report_pos: 0, - search_pos: 0, - pending_match: None, - has_empty_match_at_end: ac.is_match(""), - } - } - - fn next(&mut self) -> Option> { - loop { - if let Some(mut mat) = self.pending_match.take() { - let bytes = &self.buf.buffer()[mat.start()..mat.end()]; - self.report_pos = mat.end(); - mat = mat.increment(self.absolute_pos); - return Some(Ok(StreamChunk::Match { bytes, mat })); - } - if self.search_pos >= self.buf.len() { - if let Some(end) = self.unreported() { - let bytes = &self.buf.buffer()[self.report_pos..end]; - self.report_pos = end; - return Some(Ok(StreamChunk::NonMatch { bytes })); - } - if self.buf.len() >= self.buf.min_buffer_len() { - // This is the point at which we roll our buffer, which we - // only do if our buffer has at least the minimum amount of - // bytes in it. Before rolling, we update our various - // positions to be consistent with the buffer after it has - // been rolled. - - self.report_pos -= - self.buf.len() - self.buf.min_buffer_len(); - self.absolute_pos += - self.search_pos - self.buf.min_buffer_len(); - self.search_pos = self.buf.min_buffer_len(); - self.buf.roll(); - } - match self.buf.fill(&mut self.rdr) { - Err(err) => return Some(Err(err)), - Ok(false) => { - // We've hit EOF, but if there are still some - // unreported bytes remaining, return them now. - if self.report_pos < self.buf.len() { - let bytes = &self.buf.buffer()[self.report_pos..]; - self.report_pos = self.buf.len(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } else { - // We've reported everything, but there might still - // be a match at the very last position. - if !self.has_empty_match_at_end { - return None; - } - // fallthrough for another search to get trailing - // empty matches - self.has_empty_match_at_end = false; - } - } - Ok(true) => {} - } - } - let result = self.fsm.earliest_find_at( - &mut self.prestate, - self.buf.buffer(), - self.search_pos, - &mut self.state_id, - ); - match result { - None => { - self.search_pos = self.buf.len(); - } - Some(mat) => { - self.state_id = self.fsm.start_state(); - if mat.end() == self.search_pos { - // If the automaton can match the empty string and if - // we found an empty match, then we need to forcefully - // move the position. - self.search_pos += 1; - } else { - self.search_pos = mat.end(); - } - self.pending_match = Some(mat.clone()); - if self.report_pos < mat.start() { - let bytes = - &self.buf.buffer()[self.report_pos..mat.start()]; - self.report_pos = mat.start(); - - let chunk = StreamChunk::NonMatch { bytes }; - return Some(Ok(chunk)); - } - } - } - } - } - - fn unreported(&self) -> Option { - let end = self.search_pos.saturating_sub(self.buf.min_buffer_len()); - if self.report_pos < end { - Some(end) - } else { - None - } - } -} - -/// A builder for configuring an Aho-Corasick automaton. -#[derive(Clone, Debug)] -pub struct AhoCorasickBuilder { - nfa_builder: nfa::Builder, - dfa_builder: dfa::Builder, - dfa: bool, -} - -impl Default for AhoCorasickBuilder { - fn default() -> AhoCorasickBuilder { - AhoCorasickBuilder::new() - } -} - -impl AhoCorasickBuilder { - /// Create a new builder for configuring an Aho-Corasick automaton. - /// - /// If you don't need fine grained configuration or aren't sure which knobs - /// to set, try using - /// [`AhoCorasick::new_auto_configured`](struct.AhoCorasick.html#method.new_auto_configured) - /// instead. - pub fn new() -> AhoCorasickBuilder { - AhoCorasickBuilder { - nfa_builder: nfa::Builder::new(), - dfa_builder: dfa::Builder::new(), - dfa: false, - } - } - - /// Build an Aho-Corasick automaton using the configuration set on this - /// builder. - /// - /// A builder may be reused to create more automatons. - /// - /// This method will use the default for representing internal state - /// identifiers, which is `usize`. This guarantees that building the - /// automaton will succeed and is generally a good default, but can make - /// the size of the automaton 2-8 times bigger than it needs to be, - /// depending on your target platform. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn build(&self, patterns: I) -> AhoCorasick - where - I: IntoIterator, - P: AsRef<[u8]>, - { - // The builder only returns an error if the chosen state ID - // representation is too small to fit all of the given patterns. In - // this case, since we fix the representation to usize, it will always - // work because it's impossible to overflow usize since the underlying - // storage would OOM long before that happens. - self.build_with_size::(patterns) - .expect("usize state ID type should always work") - } - - /// Build an Aho-Corasick automaton using the configuration set on this - /// builder with a specific state identifier representation. This only has - /// an effect when the `dfa` option is enabled. - /// - /// Generally, the choices for a state identifier representation are - /// `u8`, `u16`, `u32`, `u64` or `usize`, with `usize` being the default. - /// The advantage of choosing a smaller state identifier representation - /// is that the automaton produced will be smaller. This might be - /// beneficial for just generally using less space, or might even allow it - /// to fit more of the automaton in your CPU's cache, leading to overall - /// better search performance. - /// - /// Unlike the standard `build` method, this can report an error if the - /// state identifier representation cannot support the size of the - /// automaton. - /// - /// Note that the state identifier representation is determined by the - /// `S` type variable. This requires a type hint of some sort, either - /// by specifying the return type or using the turbofish, e.g., - /// `build_with_size::(...)`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac: AhoCorasick = AhoCorasickBuilder::new() - /// .build_with_size(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - /// - /// Or alternatively, with turbofish: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// # fn example() -> Result<(), ::aho_corasick::Error> { - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .build_with_size::(patterns)?; - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// # Ok(()) }; example().unwrap() - /// ``` - pub fn build_with_size( - &self, - patterns: I, - ) -> Result> - where - S: StateID, - I: IntoIterator, - P: AsRef<[u8]>, - { - let nfa = self.nfa_builder.build(patterns)?; - let match_kind = nfa.match_kind().clone(); - let imp = if self.dfa { - let dfa = self.dfa_builder.build(&nfa)?; - Imp::DFA(dfa) - } else { - Imp::NFA(nfa) - }; - Ok(AhoCorasick { imp, match_kind }) - } - - /// Automatically configure the settings on this builder according to the - /// patterns that will be used to construct the automaton. - /// - /// The idea here is to balance space and time automatically. That is, when - /// searching a small number of patterns, this will attempt to use the - /// fastest possible configuration since the total space required will be - /// small anyway. As the number of patterns grows, this will fall back to - /// slower configurations that use less space. - /// - /// This is guaranteed to never set `match_kind`, but any other option may - /// be overridden. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar", "baz"]; - /// let ac = AhoCorasickBuilder::new() - /// .auto_configure(patterns) - /// .build(patterns); - /// assert_eq!(Some(1), ac.find("xxx bar xxx").map(|m| m.pattern())); - /// ``` - pub fn auto_configure>( - &mut self, - patterns: &[B], - ) -> &mut AhoCorasickBuilder { - // N.B. Currently we only use the length of `patterns` to make a - // decision here, and could therefore ask for an `ExactSizeIterator` - // instead. But it's conceivable that we might adapt this to look at - // the total number of bytes, which would requires a second pass. - // - // The logic here is fairly rudimentary at the moment, but probably - // OK. The idea here is to use the fastest thing possible for a small - // number of patterns. That is, a DFA with no byte classes, since byte - // classes require an extra indirection for every byte searched. With a - // moderate number of patterns, we still want a DFA, but save on both - // space and compilation time by enabling byte classes. Finally, fall - // back to the slower but smaller NFA. - if patterns.len() <= 100 { - // N.B. Using byte classes can actually be faster by improving - // locality, but this only really applies for multi-megabyte - // automata (i.e., automata that don't fit in your CPU's cache). - self.dfa(true); - } else if patterns.len() <= 5000 { - self.dfa(true); - } - self - } - - /// Set the desired match semantics. - /// - /// The default is `MatchKind::Standard`, which corresponds to the match - /// semantics supported by the standard textbook description of the - /// Aho-Corasick algorithm. Namely, matches are reported as soon as they - /// are found. Moreover, this is the only way to get overlapping matches - /// or do stream searching. - /// - /// The other kinds of match semantics that are supported are - /// `MatchKind::LeftmostFirst` and `MatchKind::LeftmostLongest`. The former - /// corresponds to the match you would get if you were to try to match - /// each pattern at each position in the haystack in the same order that - /// you give to the automaton. That is, it returns the leftmost match - /// corresponding the earliest pattern given to the automaton. The latter - /// corresponds to finding the longest possible match among all leftmost - /// matches. - /// - /// For more details on match semantics, see the - /// [documentation for `MatchKind`](enum.MatchKind.html). - /// - /// # Examples - /// - /// In these examples, we demonstrate the differences between match - /// semantics for a particular set of patterns in a specific order: - /// `b`, `abc`, `abcd`. - /// - /// Standard semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::Standard) // default, not necessary - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("b", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Leftmost-first semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostFirst) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abc", &haystack[mat.start()..mat.end()]); - /// ``` - /// - /// Leftmost-longest semantics: - /// - /// ``` - /// use aho_corasick::{AhoCorasickBuilder, MatchKind}; - /// - /// let patterns = &["b", "abc", "abcd"]; - /// let haystack = "abcd"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .match_kind(MatchKind::LeftmostLongest) - /// .build(patterns); - /// let mat = ac.find(haystack).expect("should have a match"); - /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]); - /// ``` - pub fn match_kind(&mut self, kind: MatchKind) -> &mut AhoCorasickBuilder { - self.nfa_builder.match_kind(kind); - self - } - - /// Enable anchored mode, which requires all matches to start at the - /// first position in a haystack. - /// - /// This option is disabled by default. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "bar"]; - /// let haystack = "foobar"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(1, ac.find_iter(haystack).count()); - /// ``` - /// - /// When searching for overlapping matches, all matches that start at - /// the beginning of a haystack will be reported: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["foo", "foofoo"]; - /// let haystack = "foofoo"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .anchored(true) - /// .build(patterns); - /// assert_eq!(2, ac.find_overlapping_iter(haystack).count()); - /// // A non-anchored search would return 3 matches. - /// ``` - pub fn anchored(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.anchored(yes); - self - } - - /// Enable ASCII-aware case insensitive matching. - /// - /// When this option is enabled, searching will be performed without - /// respect to case for ASCII letters (`a-z` and `A-Z`) only. - /// - /// Enabling this option does not change the search algorithm, but it may - /// increase the size of the automaton. - /// - /// **NOTE:** It is unlikely that support for Unicode case folding will - /// be added in the future. The ASCII case works via a simple hack to the - /// underlying automaton, but full Unicode handling requires a fair bit of - /// sophistication. If you do need Unicode handling, you might consider - /// using the [`regex` crate](https://docs.rs/regex) or the lower level - /// [`regex-automata` crate](https://docs.rs/regex-automata). - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::AhoCorasickBuilder; - /// - /// let patterns = &["FOO", "bAr", "BaZ"]; - /// let haystack = "foo bar baz"; - /// - /// let ac = AhoCorasickBuilder::new() - /// .ascii_case_insensitive(true) - /// .build(patterns); - /// assert_eq!(3, ac.find_iter(haystack).count()); - /// ``` - pub fn ascii_case_insensitive( - &mut self, - yes: bool, - ) -> &mut AhoCorasickBuilder { - self.nfa_builder.ascii_case_insensitive(yes); - self - } - - /// Set the limit on how many NFA states use a dense representation for - /// their transitions. - /// - /// A dense representation uses more space, but supports faster access to - /// transitions at search time. Thus, this setting permits the control of a - /// space vs time trade off when using the NFA variant of Aho-Corasick. - /// - /// This limit is expressed in terms of the depth of a state, i.e., the - /// number of transitions from the starting state of the NFA. The idea is - /// that most of the time searching will be spent near the starting state - /// of the automaton, so states near the start state should use a dense - /// representation. States further away from the start state would then use - /// a sparse representation, which uses less space but is slower to access - /// transitions at search time. - /// - /// By default, this is set to a low but non-zero number. - /// - /// This setting has no effect if the `dfa` option is enabled. - pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder { - self.nfa_builder.dense_depth(depth); - self - } - - /// Compile the standard Aho-Corasick automaton into a deterministic finite - /// automaton (DFA). - /// - /// When this is disabled (which is the default), then a non-deterministic - /// finite automaton (NFA) is used instead. - /// - /// The main benefit to a DFA is that it can execute searches more quickly - /// than a NFA (perhaps 2-4 times as fast). The main drawback is that the - /// DFA uses more space and can take much longer to build. - /// - /// Enabling this option does not change the time complexity for - /// constructing the Aho-Corasick automaton (which is `O(p)` where - /// `p` is the total number of patterns being compiled). Enabling this - /// option does however reduce the time complexity of non-overlapping - /// searches from `O(n + p)` to `O(n)`, where `n` is the length of the - /// haystack. - /// - /// In general, it's a good idea to enable this if you're searching a - /// small number of fairly short patterns (~1000), or if you want the - /// fastest possible search without regard to compilation time or space - /// usage. - pub fn dfa(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa = yes; - self - } - - /// Enable heuristic prefilter optimizations. - /// - /// When enabled, searching will attempt to quickly skip to match - /// candidates using specialized literal search routines. A prefilter - /// cannot always be used, and is generally treated as a heuristic. It - /// can be useful to disable this if the prefilter is observed to be - /// sub-optimal for a particular workload. - /// - /// This is enabled by default. - pub fn prefilter(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.nfa_builder.prefilter(yes); - self - } - - /// Shrink the size of the transition alphabet by mapping bytes to their - /// equivalence classes. This only has an effect when the `dfa` option is - /// enabled. - /// - /// When enabled, each a DFA will use a map from all possible bytes - /// to their corresponding equivalence class. Each equivalence class - /// represents a set of bytes that does not discriminate between a match - /// and a non-match in the DFA. For example, the patterns `bar` and `baz` - /// have at least five equivalence classes: singleton sets of `b`, `a`, `r` - /// and `z`, and a final set that contains every other byte. - /// - /// The advantage of this map is that the size of the transition table can - /// be reduced drastically from `#states * 256 * sizeof(id)` to - /// `#states * k * sizeof(id)` where `k` is the number of equivalence - /// classes. As a result, total space usage can decrease substantially. - /// Moreover, since a smaller alphabet is used, compilation becomes faster - /// as well. - /// - /// The disadvantage of this map is that every byte searched must be - /// passed through this map before it can be used to determine the next - /// transition. This has a small match time performance cost. However, if - /// the DFA is otherwise very large without byte classes, then using byte - /// classes can greatly improve memory locality and thus lead to better - /// overall performance. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn byte_classes(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.byte_classes(yes); - self - } - - /// Premultiply state identifiers in the transition table. This only has - /// an effect when the `dfa` option is enabled. - /// - /// When enabled, state identifiers are premultiplied to point to their - /// corresponding row in the transition table. That is, given the `i`th - /// state, its corresponding premultiplied identifier is `i * k` where `k` - /// is the alphabet size of the automaton. (The alphabet size is at most - /// 256, but is in practice smaller if byte classes is enabled.) - /// - /// When state identifiers are not premultiplied, then the identifier of - /// the `i`th state is `i`. - /// - /// The advantage of premultiplying state identifiers is that is saves a - /// multiplication instruction per byte when searching with a DFA. This has - /// been observed to lead to a 20% performance benefit in micro-benchmarks. - /// - /// The primary disadvantage of premultiplying state identifiers is - /// that they require a larger integer size to represent. For example, - /// if the DFA has 200 states, then its premultiplied form requires 16 - /// bits to represent every possible state identifier, where as its - /// non-premultiplied form only requires 8 bits. - /// - /// This option is enabled by default. - #[deprecated( - since = "0.7.16", - note = "not carrying its weight, will be always enabled, see: https://github.com/BurntSushi/aho-corasick/issues/57" - )] - pub fn premultiply(&mut self, yes: bool) -> &mut AhoCorasickBuilder { - self.dfa_builder.premultiply(yes); - self - } -} - -/// A knob for controlling the match semantics of an Aho-Corasick automaton. -/// -/// There are two generally different ways that Aho-Corasick automatons can -/// report matches. The first way is the "standard" approach that results from -/// implementing most textbook explanations of Aho-Corasick. The second way is -/// to report only the leftmost non-overlapping matches. The leftmost approach -/// is in turn split into two different ways of resolving ambiguous matches: -/// leftmost-first and leftmost-longest. -/// -/// The `Standard` match kind is the default and is the only one that supports -/// overlapping matches and stream searching. (Trying to find overlapping -/// or streaming matches using leftmost match semantics will result in a -/// panic.) The `Standard` match kind will report matches as they are seen. -/// When searching for overlapping matches, then all possible matches are -/// reported. When searching for non-overlapping matches, the first match seen -/// is reported. For example, for non-overlapping matches, given the patterns -/// `abcd` and `b` and the subject string `abcdef`, only a match for `b` is -/// reported since it is detected first. The `abcd` match is never reported -/// since it overlaps with the `b` match. -/// -/// In contrast, the leftmost match kind always prefers the leftmost match -/// among all possible matches. Given the same example as above with `abcd` and -/// `b` as patterns and `abcdef` as the subject string, the leftmost match is -/// `abcd` since it begins before the `b` match, even though the `b` match is -/// detected before the `abcd` match. In this case, the `b` match is not -/// reported at all since it overlaps with the `abcd` match. -/// -/// The difference between leftmost-first and leftmost-longest is in how they -/// resolve ambiguous matches when there are multiple leftmost matches to -/// choose from. Leftmost-first always chooses the pattern that was provided -/// earliest, where as leftmost-longest always chooses the longest matching -/// pattern. For example, given the patterns `a` and `ab` and the subject -/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match -/// is `ab`. Conversely, if the patterns were given in reverse order, i.e., -/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches -/// would be `ab`. Stated differently, the leftmost-first match depends on the -/// order in which the patterns were given to the Aho-Corasick automaton. -/// Because of that, when leftmost-first matching is used, if a pattern `A` -/// that appears before a pattern `B` is a prefix of `B`, then it is impossible -/// to ever observe a match of `B`. -/// -/// If you're not sure which match kind to pick, then stick with the standard -/// kind, which is the default. In particular, if you need overlapping or -/// streaming matches, then you _must_ use the standard kind. The leftmost -/// kinds are useful in specific circumstances. For example, leftmost-first can -/// be very useful as a way to implement match priority based on the order of -/// patterns given and leftmost-longest can be useful for dictionary searching -/// such that only the longest matching words are reported. -/// -/// # Relationship with regular expression alternations -/// -/// Understanding match semantics can be a little tricky, and one easy way -/// to conceptualize non-overlapping matches from an Aho-Corasick automaton -/// is to think about them as a simple alternation of literals in a regular -/// expression. For example, let's say we wanted to match the strings -/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It -/// turns out that regular expression engines have two different ways of -/// matching this alternation. The first way, leftmost-longest, is commonly -/// found in POSIX compatible implementations of regular expressions (such as -/// `grep`). The second way, leftmost-first, is commonly found in backtracking -/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's -/// regex engine do not use backtracking, but still implement leftmost-first -/// semantics in an effort to match the behavior of dominant backtracking -/// regex engines such as those found in Perl, Ruby, Python, Javascript and -/// PHP.) -/// -/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex -/// will match `Samwise` because it is the longest possible match, but a -/// Perl-like regex will match `Sam` since it appears earlier in the -/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine -/// will never match `Samwise` since `Sam` will always have higher priority. -/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to -/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is -/// still longest match, but it also appears earlier than `Sam`. -/// -/// The "standard" match semantics of Aho-Corasick generally don't correspond -/// to the match semantics of any large group of regex implementations, so -/// there's no direct analogy that can be made here. Standard match semantics -/// are generally useful for overlapping matches, or if you just want to see -/// matches as they are detected. -/// -/// The main conclusion to draw from this section is that the match semantics -/// can be tweaked to precisely match either Perl-like regex alternations or -/// POSIX regex alternations. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum MatchKind { - /// Use standard match semantics, which support overlapping matches. When - /// used with non-overlapping matches, matches are reported as they are - /// seen. - Standard, - /// Use leftmost-first match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the match - /// corresponding to the pattern that appeared earlier when constructing - /// the automaton is reported. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostFirst, - /// Use leftmost-longest match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the longest match - /// is chosen. - /// - /// This does **not** support overlapping matches or stream searching. If - /// this match kind is used, attempting to find overlapping matches or - /// stream matches will panic. - LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, -} - -/// The default match kind is `MatchKind::Standard`. -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::Standard - } -} - -impl MatchKind { - fn supports_overlapping(&self) -> bool { - self.is_standard() - } - - fn supports_stream(&self) -> bool { - // TODO: It may be possible to support this. It's hard. - // - // See: https://github.com/rust-lang/regex/issues/425#issuecomment-471367838 - self.is_standard() - } - - pub(crate) fn is_standard(&self) -> bool { - *self == MatchKind::Standard - } - - pub(crate) fn is_leftmost(&self) -> bool { - *self == MatchKind::LeftmostFirst - || *self == MatchKind::LeftmostLongest - } - - pub(crate) fn is_leftmost_first(&self) -> bool { - *self == MatchKind::LeftmostFirst - } - - /// Convert this match kind into a packed match kind. If this match kind - /// corresponds to standard semantics, then this returns None, since - /// packed searching does not support standard semantics. - pub(crate) fn as_packed(&self) -> Option { - match *self { - MatchKind::Standard => None, - MatchKind::LeftmostFirst => Some(packed::MatchKind::LeftmostFirst), - MatchKind::LeftmostLongest => { - Some(packed::MatchKind::LeftmostLongest) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn oibits() { - use std::panic::{RefUnwindSafe, UnwindSafe}; - - fn assert_send() {} - fn assert_sync() {} - fn assert_unwind_safe() {} - - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); - assert_send::(); - assert_sync::(); - assert_unwind_safe::(); - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/automaton.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,573 +0,0 @@ -use crate::ahocorasick::MatchKind; -use crate::prefilter::{self, Candidate, Prefilter, PrefilterState}; -use crate::state_id::{dead_id, fail_id, StateID}; -use crate::Match; - -// NOTE: This trait essentially started as a copy of the same trait from from -// regex-automata, with some wording changed since we use this trait for -// NFAs in addition to DFAs in this crate. Additionally, we do not export -// this trait. It's only used internally to reduce code duplication. The -// regex-automata crate needs to expose it because its Regex type is generic -// over implementations of this trait. In this crate, we encapsulate everything -// behind the AhoCorasick type. -// -// This trait is a bit of a mess, but it's not quite clear how to fix it. -// Basically, there are several competing concerns: -// -// * We need performance, so everything effectively needs to get monomorphized. -// * There are several variations on searching Aho-Corasick automatons: -// overlapping, standard and leftmost. Overlapping and standard are somewhat -// combined together below, but there is no real way to combine standard with -// leftmost. Namely, leftmost requires continuing a search even after a match -// is found, in order to correctly disambiguate a match. -// * On top of that, *sometimes* callers want to know which state the automaton -// is in after searching. This is principally useful for overlapping and -// stream searches. However, when callers don't care about this, we really -// do not want to be forced to compute it, since it sometimes requires extra -// work. Thus, there are effectively two copies of leftmost searching: one -// for tracking the state ID and one that doesn't. We should ideally do the -// same for standard searching, but my sanity stopped me. - -// SAFETY RATIONALE: Previously, the code below went to some length to remove -// all bounds checks. This generally produced tighter assembly and lead to -// 20-50% improvements in micro-benchmarks on corpora made up of random -// characters. This somewhat makes sense, since the branch predictor is going -// to be at its worse on random text. -// -// However, using the aho-corasick-debug tool and manually benchmarking -// different inputs, the code *with* bounds checks actually wound up being -// slightly faster: -// -// $ cat input -// Sherlock Holmes -// John Watson -// Professor Moriarty -// Irene Adler -// Mary Watson -// -// $ aho-corasick-debug-safe \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 32.824µs -// automaton build time: 444.687µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 1.809961702s -// -// $ aho-corasick-debug-master \ -// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa -// pattern read time: 31.425µs -// automaton build time: 317.434µs -// automaton heap usage: 72392 bytes -// match count: 639 -// count time: 2.059157705s -// -// I was able to reproduce this result on two different machines (an i5 and -// an i7). Therefore, we go the route of safe code for now. - -/// A trait describing the interface of an Aho-Corasick finite state machine. -/// -/// Every automaton has exactly one fail state, one dead state and exactly one -/// start state. Generally, these correspond to the first, second and third -/// states, respectively. The dead state is always treated as a sentinel. That -/// is, no correct Aho-Corasick automaton will ever transition into the fail -/// state. The dead state, however, can be transitioned into, but only when -/// leftmost-first or leftmost-longest match semantics are enabled and only -/// when at least one match has been observed. -/// -/// Every automaton also has one or more match states, such that -/// `Automaton::is_match_state(id)` returns `true` if and only if `id` -/// corresponds to a match state. -pub trait Automaton { - /// The representation used for state identifiers in this automaton. - /// - /// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`. - type ID: StateID; - - /// The type of matching that should be done. - fn match_kind(&self) -> &MatchKind; - - /// Returns true if and only if this automaton uses anchored searches. - fn anchored(&self) -> bool; - - /// An optional prefilter for quickly skipping to the next candidate match. - /// A prefilter must report at least every match, although it may report - /// positions that do not correspond to a match. That is, it must not allow - /// false negatives, but can allow false positives. - /// - /// Currently, a prefilter only runs when the automaton is in the start - /// state. That is, the position reported by a prefilter should always - /// correspond to the start of a potential match. - fn prefilter(&self) -> Option<&dyn Prefilter>; - - /// Return the identifier of this automaton's start state. - fn start_state(&self) -> Self::ID; - - /// Returns true if and only if the given state identifier refers to a - /// valid state. - fn is_valid(&self, id: Self::ID) -> bool; - - /// Returns true if and only if the given identifier corresponds to a match - /// state. - /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_state(&self, id: Self::ID) -> bool; - - /// Returns true if and only if the given identifier corresponds to a state - /// that is either the dead state or a match state. - /// - /// Depending on the implementation of the automaton, this routine can - /// be used to save a branch in the core matching loop. Nevertheless, - /// `is_match_state(id) || id == dead_id()` is always a valid - /// implementation. Indeed, this is the default implementation. - /// - /// The state ID given must be valid, or else implementors may panic. - fn is_match_or_dead_state(&self, id: Self::ID) -> bool { - id == dead_id() || self.is_match_state(id) - } - - /// If the given state is a match state, return the match corresponding - /// to the given match index. `end` must be the ending position of the - /// detected match. If no match exists or if `match_index` exceeds the - /// number of matches in this state, then `None` is returned. - /// - /// The state ID given must be valid, or else implementors may panic. - /// - /// If the given state ID is correct and if the `match_index` is less than - /// the number of matches for that state, then this is guaranteed to return - /// a match. - fn get_match( - &self, - id: Self::ID, - match_index: usize, - end: usize, - ) -> Option; - - /// Returns the number of matches for the given state. If the given state - /// is not a match state, then this returns 0. - /// - /// The state ID given must be valid, or else implementors must panic. - fn match_count(&self, id: Self::ID) -> usize; - - /// Given the current state that this automaton is in and the next input - /// byte, this method returns the identifier of the next state. The - /// identifier returned must always be valid and may never correspond to - /// the fail state. The returned identifier may, however, point to the - /// dead state. - /// - /// This is not safe so that implementors may look up the next state - /// without memory safety checks such as bounds checks. As such, callers - /// must ensure that the given identifier corresponds to a valid automaton - /// state. Implementors must, in turn, ensure that this routine is safe for - /// all valid state identifiers and for all possible `u8` values. - fn next_state(&self, current: Self::ID, input: u8) -> Self::ID; - - /// Like next_state, but debug_asserts that the underlying - /// implementation never returns a `fail_id()` for the next state. - fn next_state_no_fail(&self, current: Self::ID, input: u8) -> Self::ID { - let next = self.next_state(current, input); - // We should never see a transition to the failure state. - debug_assert!( - next != fail_id(), - "automaton should never return fail_id for next state" - ); - next - } - - /// Execute a search using standard match semantics. - /// - /// This can be used even when the automaton was constructed with leftmost - /// match semantics when you want to find the earliest possible match. This - /// can also be used as part of an overlapping search implementation. - /// - /// N.B. This does not report a match if `state_id` is given as a matching - /// state. As such, this should not be used directly. - #[inline(always)] - fn standard_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.standard_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.standard_find_at_imp(prestate, None, haystack, at, state_id) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is standard_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn standard_find_at_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - // This routine always quits immediately after seeing a - // match, and since dead states can only come after seeing - // a match, seeing a dead state here is impossible. (Unless - // we have an anchored automaton, in which case, dead states - // are used to stop a search.) - debug_assert!( - *state_id != dead_id() || self.anchored(), - "standard find should never see a dead state" - ); - - if self.is_match_or_dead_state(*state_id) { - return if *state_id == dead_id() { - None - } else { - self.get_match(*state_id, 0, at) - }; - } - } - None - } - - /// Execute a search using leftmost (either first or longest) match - /// semantics. - /// - /// The principle difference between searching with standard semantics and - /// searching with leftmost semantics is that leftmost searching will - /// continue searching even after a match has been found. Once a match - /// is found, the search does not stop until either the haystack has been - /// exhausted or a dead state is observed in the automaton. (Dead states - /// only exist in automatons constructed with leftmost semantics.) That is, - /// we rely on the construction of the automaton to tell us when to quit. - #[inline(never)] - fn leftmost_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_imp( - prestate, - Some(pre), - haystack, - at, - state_id, - ) - } else { - self.leftmost_find_at_imp(prestate, None, haystack, at, state_id) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at, and the inlining should remove the case analysis - // for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn leftmost_find_at_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - state_id: &mut Self::ID, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; - } - let mut last_match = self.get_match(*state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && *state_id == self.start_state() - { - let c = prefilter::next(prestate, pre, haystack, at) - .into_option(); - match c { - None => return None, - Some(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - *state_id = self.next_state_no_fail(*state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(*state_id) { - if *state_id == dead_id() { - // The only way to enter into a dead state is if a match - // has been found, so we assert as much. This is different - // from normal automata, where you might enter a dead state - // if you know a subsequent match will never be found - // (regardless of whether a match has already been found). - // For Aho-Corasick, it is built so that we can match at - // any position, so the possibility of a match always - // exists. - // - // (Unless we have an anchored automaton, in which case, - // dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; - } - last_match = self.get_match(*state_id, 0, at); - } - } - last_match - } - - /// This is like leftmost_find_at, but does not need to track a caller - /// provided state id. In other words, the only output of this routine is a - /// match, if one exists. - /// - /// It is regrettable that we need to effectively copy a chunk of - /// implementation twice, but when we don't need to track the state ID, we - /// can allow the prefilter to report matches immediately without having - /// to re-confirm them with the automaton. The re-confirmation step is - /// necessary in leftmost_find_at because tracing through the automaton is - /// the only way to correctly set the state ID. (Perhaps an alternative - /// would be to keep a map from pattern ID to matching state ID, but that - /// complicates the code and still doesn't permit us to defer to the - /// prefilter entirely when possible.) - /// - /// I did try a few things to avoid the code duplication here, but nothing - /// optimized as well as this approach. (In microbenchmarks, there was - /// about a 25% difference.) - #[inline(never)] - fn leftmost_find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - if let Some(pre) = self.prefilter() { - self.leftmost_find_at_no_state_imp( - prestate, - Some(pre), - haystack, - at, - ) - } else { - self.leftmost_find_at_no_state_imp(prestate, None, haystack, at) - } - } - - // It's important for this to always be inlined. Namely, its only caller - // is leftmost_find_at_no_state, and the inlining should remove the case - // analysis for prefilter scanning when there is no prefilter available. - #[inline(always)] - fn leftmost_find_at_no_state_imp( - &self, - prestate: &mut PrefilterState, - prefilter: Option<&dyn Prefilter>, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(self.match_kind().is_leftmost()); - if self.anchored() && at > 0 { - return None; - } - // If our prefilter handles confirmation of matches 100% of the - // time, and since we don't need to track state IDs, we can avoid - // Aho-Corasick completely. - if let Some(pre) = prefilter { - // We should never have a prefilter during an anchored search. - debug_assert!(!self.anchored()); - if !pre.reports_false_positives() { - return match pre.next_candidate(prestate, haystack, at) { - Candidate::None => None, - Candidate::Match(m) => Some(m), - Candidate::PossibleStartOfMatch(_) => unreachable!(), - }; - } - } - - let mut state_id = self.start_state(); - let mut last_match = self.get_match(state_id, 0, at); - while at < haystack.len() { - if let Some(pre) = prefilter { - if prestate.is_effective(at) && state_id == self.start_state() - { - match prefilter::next(prestate, pre, haystack, at) { - Candidate::None => return None, - // Since we aren't tracking a state ID, we can - // quit early once we know we have a match. - Candidate::Match(m) => return Some(m), - Candidate::PossibleStartOfMatch(i) => { - at = i; - } - } - } - } - // CORRECTNESS: next_state is correct for all possible u8 values, - // so the only thing we're concerned about is the validity of - // `state_id`. `state_id` either comes from the caller (in which - // case, we assume it is correct), or it comes from the return - // value of next_state, which is guaranteed to be correct. - state_id = self.next_state_no_fail(state_id, haystack[at]); - at += 1; - if self.is_match_or_dead_state(state_id) { - if state_id == dead_id() { - // The only way to enter into a dead state is if a - // match has been found, so we assert as much. This - // is different from normal automata, where you might - // enter a dead state if you know a subsequent match - // will never be found (regardless of whether a match - // has already been found). For Aho-Corasick, it is - // built so that we can match at any position, so the - // possibility of a match always exists. - // - // (Unless we have an anchored automaton, in which - // case, dead states are used to stop a search.) - debug_assert!( - last_match.is_some() || self.anchored(), - "dead state should only be seen after match" - ); - return last_match; - } - last_match = self.get_match(state_id, 0, at); - } - } - last_match - } - - /// Execute an overlapping search. - /// - /// When executing an overlapping match, the previous state ID in addition - /// to the previous match index should be given. If there are more matches - /// at the given state, then the match is reported and the given index is - /// incremented. - #[inline(always)] - fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - match_index: &mut usize, - ) -> Option { - if self.anchored() && at > 0 && *state_id == self.start_state() { - return None; - } - - let match_count = self.match_count(*state_id); - if *match_index < match_count { - // This is guaranteed to return a match since - // match_index < match_count. - let result = self.get_match(*state_id, *match_index, at); - debug_assert!(result.is_some(), "must be a match"); - *match_index += 1; - return result; - } - - *match_index = 0; - match self.standard_find_at(prestate, haystack, at, state_id) { - None => None, - Some(m) => { - *match_index = 1; - Some(m) - } - } - } - - /// Return the earliest match found. This returns as soon as we know that - /// we have a match. As such, this does not necessarily correspond to the - /// leftmost starting match, but rather, the leftmost position at which a - /// match ends. - #[inline(always)] - fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - if *state_id == self.start_state() { - if self.anchored() && at > 0 { - return None; - } - if let Some(m) = self.get_match(*state_id, 0, at) { - return Some(m); - } - } - self.standard_find_at(prestate, haystack, at, state_id) - } - - /// A convenience function for finding the next match according to the - /// match semantics of this automaton. For standard match semantics, this - /// finds the earliest match. Otherwise, the leftmost match is found. - #[inline(always)] - fn find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut Self::ID, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - self.earliest_find_at(prestate, haystack, at, state_id) - } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at(prestate, haystack, at, state_id) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } - - /// Like find_at, but does not track state identifiers. This permits some - /// optimizations when a prefilter that confirms its own matches is - /// present. - #[inline(always)] - fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self.match_kind() { - MatchKind::Standard => { - let mut state = self.start_state(); - self.earliest_find_at(prestate, haystack, at, &mut state) - } - MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => { - self.leftmost_find_at_no_state(prestate, haystack, at) - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/buffer.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -use std::cmp; -use std::io; -use std::ptr; - -/// The default buffer capacity that we use for the stream buffer. -const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB - -/// A fairly simple roll buffer for supporting stream searches. -/// -/// This buffer acts as a temporary place to store a fixed amount of data when -/// reading from a stream. Its central purpose is to allow "rolling" some -/// suffix of the data to the beginning of the buffer before refilling it with -/// more data from the stream. For example, let's say we are trying to match -/// "foobar" on a stream. When we report the match, we'd like to not only -/// report the correct offsets at which the match occurs, but also the matching -/// bytes themselves. So let's say our stream is a file with the following -/// contents: `test test foobar test test`. Now assume that we happen to read -/// the aforementioned file in two chunks: `test test foo` and `bar test test`. -/// Naively, it would not be possible to report a single contiguous `foobar` -/// match, but this roll buffer allows us to do that. Namely, after the second -/// read, the contents of the buffer should be `st foobar test test`, where the -/// search should ultimately resume immediately after `foo`. (The prefix `st ` -/// is included because the roll buffer saves N bytes at the end of the buffer, -/// where N is the maximum possible length of a match.) -/// -/// A lot of the logic for dealing with this is unfortunately split out between -/// this roll buffer and the `StreamChunkIter`. -#[derive(Debug)] -pub struct Buffer { - /// The raw buffer contents. This has a fixed size and never increases. - buf: Vec, - /// The minimum size of the buffer, which is equivalent to the maximum - /// possible length of a match. This corresponds to the amount that we - /// roll - min: usize, - /// The end of the contents of this buffer. - end: usize, -} - -impl Buffer { - /// Create a new buffer for stream searching. The minimum buffer length - /// given should be the size of the maximum possible match length. - pub fn new(min_buffer_len: usize) -> Buffer { - let min = cmp::max(1, min_buffer_len); - // The minimum buffer amount is also the amount that we roll our - // buffer in order to support incremental searching. To this end, - // our actual capacity needs to be at least 1 byte bigger than our - // minimum amount, otherwise we won't have any overlap. In actuality, - // we want our buffer to be a bit bigger than that for performance - // reasons, so we set a lower bound of `8 * min`. - // - // TODO: It would be good to find a way to test the streaming - // implementation with the minimal buffer size. For now, we just - // uncomment out the next line and comment out the subsequent line. - // let capacity = 1 + min; - let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY); - Buffer { buf: vec![0; capacity], min, end: 0 } - } - - /// Return the contents of this buffer. - #[inline] - pub fn buffer(&self) -> &[u8] { - &self.buf[..self.end] - } - - /// Return the minimum size of the buffer. The only way a buffer may be - /// smaller than this is if the stream itself contains less than the - /// minimum buffer amount. - #[inline] - pub fn min_buffer_len(&self) -> usize { - self.min - } - - /// Return the total length of the contents in the buffer. - #[inline] - pub fn len(&self) -> usize { - self.end - } - - /// Return all free capacity in this buffer. - fn free_buffer(&mut self) -> &mut [u8] { - &mut self.buf[self.end..] - } - - /// Refill the contents of this buffer by reading as much as possible into - /// this buffer's free capacity. If no more bytes could be read, then this - /// returns false. Otherwise, this reads until it has filled the buffer - /// past the minimum amount. - pub fn fill(&mut self, mut rdr: R) -> io::Result { - let mut readany = false; - loop { - let readlen = rdr.read(self.free_buffer())?; - if readlen == 0 { - return Ok(readany); - } - readany = true; - self.end += readlen; - if self.len() >= self.min { - return Ok(true); - } - } - } - - /// Roll the contents of the buffer so that the suffix of this buffer is - /// moved to the front and all other contents are dropped. The size of the - /// suffix corresponds precisely to the minimum buffer length. - /// - /// This should only be called when the entire contents of this buffer have - /// been searched. - pub fn roll(&mut self) { - let roll_start = self - .end - .checked_sub(self.min) - .expect("buffer capacity should be bigger than minimum amount"); - let roll_len = self.min; - - assert!(roll_start + roll_len <= self.end); - unsafe { - // SAFETY: A buffer contains Copy data, so there's no problem - // moving it around. Safety also depends on our indices being in - // bounds, which they always should be, given the assert above. - // - // TODO: Switch to [T]::copy_within once our MSRV is high enough. - ptr::copy( - self.buf[roll_start..].as_ptr(), - self.buf.as_mut_ptr(), - roll_len, - ); - } - self.end = roll_len; - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/byte_frequencies.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,258 +0,0 @@ -pub const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/classes.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,238 +0,0 @@ -use std::fmt; - -/// A representation of byte oriented equivalence classes. -/// -/// This is used in an FSM to reduce the size of the transition table. This can -/// have a particularly large impact not only on the total size of an FSM, but -/// also on compile times. -#[derive(Clone, Copy)] -pub struct ByteClasses([u8; 256]); - -impl ByteClasses { - /// Creates a new set of equivalence classes where all bytes are mapped to - /// the same class. - pub fn empty() -> ByteClasses { - ByteClasses([0; 256]) - } - - /// Creates a new set of equivalence classes where each byte belongs to - /// its own equivalence class. - pub fn singletons() -> ByteClasses { - let mut classes = ByteClasses::empty(); - for i in 0..256 { - classes.set(i as u8, i as u8); - } - classes - } - - /// Set the equivalence class for the given byte. - #[inline] - pub fn set(&mut self, byte: u8, class: u8) { - self.0[byte as usize] = class; - } - - /// Get the equivalence class for the given byte. - #[inline] - pub fn get(&self, byte: u8) -> u8 { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - self.0[byte as usize] - } - - /// Return the total number of elements in the alphabet represented by - /// these equivalence classes. Equivalently, this returns the total number - /// of equivalence classes. - #[inline] - pub fn alphabet_len(&self) -> usize { - self.0[255] as usize + 1 - } - - /// Returns true if and only if every byte in this class maps to its own - /// equivalence class. Equivalently, there are 256 equivalence classes - /// and each class contains exactly one byte. - #[inline] - pub fn is_singleton(&self) -> bool { - self.alphabet_len() == 256 - } - - /// Returns an iterator over a sequence of representative bytes from each - /// equivalence class. Namely, this yields exactly N items, where N is - /// equivalent to the number of equivalence classes. Each item is an - /// arbitrary byte drawn from each equivalence class. - /// - /// This is useful when one is determinizing an NFA and the NFA's alphabet - /// hasn't been converted to equivalence classes yet. Picking an arbitrary - /// byte from each equivalence class then permits a full exploration of - /// the NFA instead of using every possible byte value. - pub fn representatives(&self) -> ByteClassRepresentatives<'_> { - ByteClassRepresentatives { classes: self, byte: 0, last_class: None } - } - - /// Returns all of the bytes in the given equivalence class. - /// - /// The second element in the tuple indicates the number of elements in - /// the array. - fn elements(&self, equiv: u8) -> ([u8; 256], usize) { - let (mut array, mut len) = ([0; 256], 0); - for b in 0..256 { - if self.get(b as u8) == equiv { - array[len] = b as u8; - len += 1; - } - } - (array, len) - } -} - -impl fmt::Debug for ByteClasses { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.is_singleton() { - write!(f, "ByteClasses({{singletons}})") - } else { - write!(f, "ByteClasses(")?; - for equiv in 0..self.alphabet_len() { - let (members, len) = self.elements(equiv as u8); - write!(f, " {} => {:?}", equiv, &members[..len])?; - } - write!(f, ")") - } - } -} - -/// An iterator over representative bytes from each equivalence class. -#[derive(Debug)] -pub struct ByteClassRepresentatives<'a> { - classes: &'a ByteClasses, - byte: usize, - last_class: Option, -} - -impl<'a> Iterator for ByteClassRepresentatives<'a> { - type Item = u8; - - fn next(&mut self) -> Option { - while self.byte < 256 { - let byte = self.byte as u8; - let class = self.classes.get(byte); - self.byte += 1; - - if self.last_class != Some(class) { - self.last_class = Some(class); - return Some(byte); - } - } - None - } -} - -/// A byte class builder keeps track of an *approximation* of equivalence -/// classes of bytes during NFA construction. That is, every byte in an -/// equivalence class cannot discriminate between a match and a non-match. -/// -/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w] -/// and [{-\xFF] never discriminate between a match and a non-match, precisely -/// because they never occur in the literals anywhere. -/// -/// Note though that this does not necessarily compute the minimal set of -/// equivalence classes. For example, in the literals above, the byte ranges -/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence -/// classes even though they could be treated a single class. The reason for -/// this is implementation complexity. In the future, we should endeavor to -/// compute the minimal equivalence classes since they can have a rather large -/// impact on the size of the DFA. -/// -/// The representation here is 256 booleans, all initially set to false. Each -/// boolean maps to its corresponding byte based on position. A `true` value -/// indicates the end of an equivalence class, where its corresponding byte -/// and all of the bytes corresponding to all previous contiguous `false` -/// values are in the same equivalence class. -/// -/// This particular representation only permits contiguous ranges of bytes to -/// be in the same equivalence class, which means that we can never discover -/// the true minimal set of equivalence classes. -#[derive(Debug)] -pub struct ByteClassBuilder(Vec); - -impl ByteClassBuilder { - /// Create a new builder of byte classes where all bytes are part of the - /// same equivalence class. - pub fn new() -> ByteClassBuilder { - ByteClassBuilder(vec![false; 256]) - } - - /// Indicate the the range of byte given (inclusive) can discriminate a - /// match between it and all other bytes outside of the range. - pub fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - /// Build byte classes that map all byte values to their corresponding - /// equivalence class. The last mapping indicates the largest equivalence - /// class identifier (which is never bigger than 255). - pub fn build(&self) -> ByteClasses { - let mut classes = ByteClasses::empty(); - let mut class = 0u8; - let mut i = 0; - loop { - classes.set(i as u8, class as u8); - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - classes - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn byte_classes() { - let mut set = ByteClassBuilder::new(); - set.set_range(b'a', b'z'); - - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(b'a' - 1), 0); - assert_eq!(classes.get(b'a'), 1); - assert_eq!(classes.get(b'm'), 1); - assert_eq!(classes.get(b'z'), 1); - assert_eq!(classes.get(b'z' + 1), 2); - assert_eq!(classes.get(254), 2); - assert_eq!(classes.get(255), 2); - - let mut set = ByteClassBuilder::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.build(); - assert_eq!(classes.get(0), 0); - assert_eq!(classes.get(1), 0); - assert_eq!(classes.get(2), 0); - assert_eq!(classes.get(3), 1); - assert_eq!(classes.get(4), 2); - assert_eq!(classes.get(5), 2); - assert_eq!(classes.get(6), 2); - assert_eq!(classes.get(7), 3); - assert_eq!(classes.get(255), 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassBuilder::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.build().alphabet_len(), 256); - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/dfa.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,713 +0,0 @@ -use std::mem::size_of; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::ByteClasses; -use crate::error::Result; -use crate::nfa::{PatternID, PatternLength, NFA}; -use crate::prefilter::{Prefilter, PrefilterObj, PrefilterState}; -use crate::state_id::{dead_id, fail_id, premultiply_overflow_error, StateID}; -use crate::Match; - -#[derive(Clone, Debug)] -pub enum DFA { - Standard(Standard), - ByteClass(ByteClass), - Premultiplied(Premultiplied), - PremultipliedByteClass(PremultipliedByteClass), -} - -impl DFA { - fn repr(&self) -> &Repr { - match *self { - DFA::Standard(ref dfa) => dfa.repr(), - DFA::ByteClass(ref dfa) => dfa.repr(), - DFA::Premultiplied(ref dfa) => dfa.repr(), - DFA::PremultipliedByteClass(ref dfa) => dfa.repr(), - } - } - - pub fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - pub fn heap_bytes(&self) -> usize { - self.repr().heap_bytes - } - - pub fn max_pattern_len(&self) -> usize { - self.repr().max_pattern_len - } - - pub fn pattern_count(&self) -> usize { - self.repr().pattern_count - } - - pub fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - pub fn start_state(&self) -> S { - self.repr().start_id - } - - #[inline(always)] - pub fn overlapping_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - match_index: &mut usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::ByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::Premultiplied(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - DFA::PremultipliedByteClass(ref dfa) => dfa.overlapping_find_at( - prestate, - haystack, - at, - state_id, - match_index, - ), - } - } - - #[inline(always)] - pub fn earliest_find_at( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - state_id: &mut S, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::ByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::Premultiplied(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.earliest_find_at(prestate, haystack, at, state_id) - } - } - } - - #[inline(always)] - pub fn find_at_no_state( - &self, - prestate: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Option { - match *self { - DFA::Standard(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::ByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::Premultiplied(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - DFA::PremultipliedByteClass(ref dfa) => { - dfa.find_at_no_state(prestate, haystack, at) - } - } - } -} - -#[derive(Clone, Debug)] -pub struct Standard(Repr); - -impl Standard { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for Standard { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) - } - - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) - } - - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() * 256 + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct ByteClass(Repr); - -impl ByteClass { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for ByteClass { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - self.repr().get_match(id, match_index, end) - } - - fn match_count(&self, id: S) -> usize { - self.repr().match_count(id) - } - - fn next_state(&self, current: S, input: u8) -> S { - let alphabet_len = self.repr().byte_classes.alphabet_len(); - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() * alphabet_len + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct Premultiplied(Repr); - -impl Premultiplied { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for Premultiplied { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / 256) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; - } - self.repr() - .matches - .get(id.to_usize() / 256) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / 256; - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let o = current.to_usize() + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct PremultipliedByteClass(Repr); - -impl PremultipliedByteClass { - fn repr(&self) -> &Repr { - &self.0 - } -} - -impl Automaton for PremultipliedByteClass { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.repr().match_kind - } - - fn anchored(&self) -> bool { - self.repr().anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.repr().prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.repr().start_id - } - - fn is_valid(&self, id: S) -> bool { - (id.to_usize() / self.repr().alphabet_len()) < self.repr().state_count - } - - fn is_match_state(&self, id: S) -> bool { - self.repr().is_match_state(id) - } - - fn is_match_or_dead_state(&self, id: S) -> bool { - self.repr().is_match_or_dead_state(id) - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.repr().max_match { - return None; - } - self.repr() - .matches - .get(id.to_usize() / self.repr().alphabet_len()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - fn match_count(&self, id: S) -> usize { - let o = id.to_usize() / self.repr().alphabet_len(); - self.repr().matches[o].len() - } - - fn next_state(&self, current: S, input: u8) -> S { - let input = self.repr().byte_classes.get(input); - let o = current.to_usize() + input as usize; - self.repr().trans[o] - } -} - -#[derive(Clone, Debug)] -pub struct Repr { - match_kind: MatchKind, - anchored: bool, - premultiplied: bool, - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton. This includes - /// patterns that may never match. - pattern_count: usize, - state_count: usize, - max_match: S, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly detecting candidate matchs, if pertinent. - prefilter: Option, - byte_classes: ByteClasses, - trans: Vec, - matches: Vec>, -} - -impl Repr { - /// Returns the total alphabet size for this DFA. - /// - /// If byte classes are enabled, then this corresponds to the number of - /// equivalence classes. If they are disabled, then this is always 256. - fn alphabet_len(&self) -> usize { - self.byte_classes.alphabet_len() - } - - /// Returns true only if the given state is a match state. - fn is_match_state(&self, id: S) -> bool { - id <= self.max_match && id > dead_id() - } - - /// Returns true only if the given state is either a dead state or a match - /// state. - fn is_match_or_dead_state(&self, id: S) -> bool { - id <= self.max_match - } - - /// Get the ith match for the given state, where the end position of a - /// match was found at `end`. - /// - /// # Panics - /// - /// The caller must ensure that the given state identifier is valid, - /// otherwise this may panic. The `match_index` need not be valid. That is, - /// if the given state has no matches then this returns `None`. - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - if id > self.max_match { - return None; - } - self.matches - .get(id.to_usize()) - .and_then(|m| m.get(match_index)) - .map(|&(id, len)| Match { pattern: id, len, end }) - } - - /// Return the total number of matches for the given state. - /// - /// # Panics - /// - /// The caller must ensure that the given identifier is valid, or else - /// this panics. - fn match_count(&self, id: S) -> usize { - self.matches[id.to_usize()].len() - } - - /// Get the next state given `from` as the current state and `byte` as the - /// current input byte. - fn next_state(&self, from: S, byte: u8) -> S { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] - } - - /// Set the `byte` transition for the `from` state to point to `to`. - fn set_next_state(&mut self, from: S, byte: u8, to: S) { - let alphabet_len = self.alphabet_len(); - let byte = self.byte_classes.get(byte); - self.trans[from.to_usize() * alphabet_len + byte as usize] = to; - } - - /// Swap the given states in place. - fn swap_states(&mut self, id1: S, id2: S) { - assert!(!self.premultiplied, "can't swap states in premultiplied DFA"); - - let o1 = id1.to_usize() * self.alphabet_len(); - let o2 = id2.to_usize() * self.alphabet_len(); - for b in 0..self.alphabet_len() { - self.trans.swap(o1 + b, o2 + b); - } - self.matches.swap(id1.to_usize(), id2.to_usize()); - } - - /// This routine shuffles all match states in this DFA to the beginning - /// of the DFA such that every non-match state appears after every match - /// state. (With one exception: the special fail and dead states remain as - /// the first two states.) - /// - /// The purpose of doing this shuffling is to avoid an extra conditional - /// in the search loop, and in particular, detecting whether a state is a - /// match or not does not need to access any memory. - /// - /// This updates `self.max_match` to point to the last matching state as - /// well as `self.start` if the starting state was moved. - fn shuffle_match_states(&mut self) { - assert!( - !self.premultiplied, - "cannot shuffle match states of premultiplied DFA" - ); - - if self.state_count <= 1 { - return; - } - - let mut first_non_match = self.start_id.to_usize(); - while first_non_match < self.state_count - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; - } - - let mut swaps: Vec = vec![fail_id(); self.state_count]; - let mut cur = self.state_count - 1; - while cur > first_non_match { - if self.matches[cur].len() > 0 { - self.swap_states( - S::from_usize(cur), - S::from_usize(first_non_match), - ); - swaps[cur] = S::from_usize(first_non_match); - swaps[first_non_match] = S::from_usize(cur); - - first_non_match += 1; - while first_non_match < cur - && self.matches[first_non_match].len() > 0 - { - first_non_match += 1; - } - } - cur -= 1; - } - for id in (0..self.state_count).map(S::from_usize) { - let alphabet_len = self.alphabet_len(); - let offset = id.to_usize() * alphabet_len; - for next in &mut self.trans[offset..offset + alphabet_len] { - if swaps[next.to_usize()] != fail_id() { - *next = swaps[next.to_usize()]; - } - } - } - if swaps[self.start_id.to_usize()] != fail_id() { - self.start_id = swaps[self.start_id.to_usize()]; - } - self.max_match = S::from_usize(first_non_match - 1); - } - - fn premultiply(&mut self) -> Result<()> { - if self.premultiplied || self.state_count <= 1 { - return Ok(()); - } - - let alpha_len = self.alphabet_len(); - premultiply_overflow_error( - S::from_usize(self.state_count - 1), - alpha_len, - )?; - - for id in (2..self.state_count).map(S::from_usize) { - let offset = id.to_usize() * alpha_len; - for next in &mut self.trans[offset..offset + alpha_len] { - if *next == dead_id() { - continue; - } - *next = S::from_usize(next.to_usize() * alpha_len); - } - } - self.premultiplied = true; - self.start_id = S::from_usize(self.start_id.to_usize() * alpha_len); - self.max_match = S::from_usize(self.max_match.to_usize() * alpha_len); - Ok(()) - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = (self.trans.len() * size_of::()) - + (self.matches.len() - * size_of::>()); - for state_matches in &self.matches { - size += - state_matches.len() * size_of::<(PatternID, PatternLength)>(); - } - size += self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()); - self.heap_bytes = size; - } -} - -/// A builder for configuring the determinization of an NFA into a DFA. -#[derive(Clone, Debug)] -pub struct Builder { - premultiply: bool, - byte_classes: bool, -} - -impl Builder { - /// Create a new builder for a DFA. - pub fn new() -> Builder { - Builder { premultiply: true, byte_classes: true } - } - - /// Build a DFA from the given NFA. - /// - /// This returns an error if the state identifiers exceed their - /// representation size. This can only happen when state ids are - /// premultiplied (which is enabled by default). - pub fn build(&self, nfa: &NFA) -> Result> { - let byte_classes = if self.byte_classes { - nfa.byte_classes().clone() - } else { - ByteClasses::singletons() - }; - let alphabet_len = byte_classes.alphabet_len(); - let trans = vec![fail_id(); alphabet_len * nfa.state_len()]; - let matches = vec![vec![]; nfa.state_len()]; - let mut repr = Repr { - match_kind: nfa.match_kind().clone(), - anchored: nfa.anchored(), - premultiplied: false, - start_id: nfa.start_state(), - max_pattern_len: nfa.max_pattern_len(), - pattern_count: nfa.pattern_count(), - state_count: nfa.state_len(), - max_match: fail_id(), - heap_bytes: 0, - prefilter: nfa.prefilter_obj().map(|p| p.clone()), - byte_classes: byte_classes.clone(), - trans, - matches, - }; - for id in (0..nfa.state_len()).map(S::from_usize) { - repr.matches[id.to_usize()].extend_from_slice(nfa.matches(id)); - - let fail = nfa.failure_transition(id); - nfa.iter_all_transitions(&byte_classes, id, |b, mut next| { - if next == fail_id() { - next = nfa_next_state_memoized(nfa, &repr, id, fail, b); - } - repr.set_next_state(id, b, next); - }); - } - repr.shuffle_match_states(); - repr.calculate_size(); - if self.premultiply { - repr.premultiply()?; - if byte_classes.is_singleton() { - Ok(DFA::Premultiplied(Premultiplied(repr))) - } else { - Ok(DFA::PremultipliedByteClass(PremultipliedByteClass(repr))) - } - } else { - if byte_classes.is_singleton() { - Ok(DFA::Standard(Standard(repr))) - } else { - Ok(DFA::ByteClass(ByteClass(repr))) - } - } - } - - /// Whether to use byte classes or in the DFA. - pub fn byte_classes(&mut self, yes: bool) -> &mut Builder { - self.byte_classes = yes; - self - } - - /// Whether to premultiply state identifier in the DFA. - pub fn premultiply(&mut self, yes: bool) -> &mut Builder { - self.premultiply = yes; - self - } -} - -/// This returns the next NFA transition (including resolving failure -/// transitions), except once it sees a state id less than the id of the DFA -/// state that is currently being populated, then we no longer need to follow -/// failure transitions and can instead query the pre-computed state id from -/// the DFA itself. -/// -/// In general, this should only be called when a failure transition is seen. -fn nfa_next_state_memoized( - nfa: &NFA, - dfa: &Repr, - populating: S, - mut current: S, - input: u8, -) -> S { - loop { - if current < populating { - return dfa.next_state(current, input); - } - let next = nfa.next_state(current, input); - if next != fail_id() { - return next; - } - current = nfa.failure_transition(current); - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/error.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,101 +0,0 @@ -use std::error; -use std::fmt; -use std::result; - -pub type Result = result::Result; - -/// An error that occurred during the construction of an Aho-Corasick -/// automaton. -#[derive(Clone, Debug)] -pub struct Error { - kind: ErrorKind, -} - -/// The kind of error that occurred. -#[derive(Clone, Debug)] -pub enum ErrorKind { - /// An error that occurs when constructing an automaton would require the - /// use of a state ID that overflows the chosen state ID representation. - /// For example, if one is using `u8` for state IDs and builds a DFA with - /// 257 states, then the last state's ID will be `256` which cannot be - /// represented with `u8`. - StateIDOverflow { - /// The maximum possible state ID. - max: usize, - }, - /// An error that occurs when premultiplication of state IDs is requested - /// when constructing an Aho-Corasick DFA, but doing so would overflow the - /// chosen state ID representation. - /// - /// When `max == requested_max`, then the state ID would overflow `usize`. - PremultiplyOverflow { - /// The maximum possible state id. - max: usize, - /// The maximum ID required by premultiplication. - requested_max: usize, - }, -} - -impl Error { - /// Return the kind of this error. - pub fn kind(&self) -> &ErrorKind { - &self.kind - } - - pub(crate) fn state_id_overflow(max: usize) -> Error { - Error { kind: ErrorKind::StateIDOverflow { max } } - } - - pub(crate) fn premultiply_overflow( - max: usize, - requested_max: usize, - ) -> Error { - Error { kind: ErrorKind::PremultiplyOverflow { max, requested_max } } - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match self.kind { - ErrorKind::StateIDOverflow { .. } => { - "state id representation too small" - } - ErrorKind::PremultiplyOverflow { .. } => { - "state id representation too small for premultiplication" - } - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.kind { - ErrorKind::StateIDOverflow { max } => write!( - f, - "building the automaton failed because it required \ - building more states that can be identified, where the \ - maximum ID for the chosen representation is {}", - max, - ), - ErrorKind::PremultiplyOverflow { max, requested_max } => { - if max == requested_max { - write!( - f, - "premultiplication of states requires the ability to \ - represent a state ID greater than what can fit on \ - this platform's usize, which is {}", - ::std::usize::MAX, - ) - } else { - write!( - f, - "premultiplication of states requires the ability to \ - represent at least a state ID of {}, but the chosen \ - representation only permits a maximum state ID of {}", - requested_max, max, - ) - } - } - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/lib.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,303 +0,0 @@ -/*! -A library for finding occurrences of many patterns at once. This library -provides multiple pattern search principally through an implementation of the -[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm), -which builds a fast finite state machine for executing searches in linear time. - -Additionally, this library provides a number of configuration options for -building the automaton that permit controlling the space versus time trade -off. Other features include simple ASCII case insensitive matching, finding -overlapping matches, replacements, searching streams and even searching and -replacing text in streams. - -Finally, unlike all other (known) Aho-Corasick implementations, this one -supports enabling -[leftmost-first](enum.MatchKind.html#variant.LeftmostFirst) -or -[leftmost-longest](enum.MatchKind.html#variant.LeftmostFirst) -match semantics, using a (seemingly) novel alternative construction algorithm. -For more details on what match semantics means, see the -[`MatchKind`](enum.MatchKind.html) -type. - -# Overview - -This section gives a brief overview of the primary types in this crate: - -* [`AhoCorasick`](struct.AhoCorasick.html) is the primary type and represents - an Aho-Corasick automaton. This is the type you use to execute searches. -* [`AhoCorasickBuilder`](struct.AhoCorasickBuilder.html) can be used to build - an Aho-Corasick automaton, and supports configuring a number of options. -* [`Match`](struct.Match.html) represents a single match reported by an - Aho-Corasick automaton. Each match has two pieces of information: the pattern - that matched and the start and end byte offsets corresponding to the position - in the haystack at which it matched. - -Additionally, the [`packed`](packed/index.html) sub-module contains a lower -level API for using fast vectorized routines for finding a small number of -patterns in a haystack. - -# Example: basic searching - -This example shows how to search for occurrences of multiple patterns -simultaneously. Each match includes the pattern that matched along with the -byte offsets of the match. - -``` -use aho_corasick::AhoCorasick; - -let patterns = &["apple", "maple", "Snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasick::new(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - -# Example: case insensitivity - -This is like the previous example, but matches `Snapple` case insensitively -using `AhoCorasickBuilder`: - -``` -use aho_corasick::AhoCorasickBuilder; - -let patterns = &["apple", "maple", "snapple"]; -let haystack = "Nobody likes maple in their apple flavored Snapple."; - -let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(patterns); -let mut matches = vec![]; -for mat in ac.find_iter(haystack) { - matches.push((mat.pattern(), mat.start(), mat.end())); -} -assert_eq!(matches, vec![ - (1, 13, 18), - (0, 28, 33), - (2, 43, 50), -]); -``` - -# Example: replacing matches in a stream - -This example shows how to execute a search and replace on a stream without -loading the entire stream into memory first. - -``` -use aho_corasick::AhoCorasick; - -# fn example() -> Result<(), ::std::io::Error> { -let patterns = &["fox", "brown", "quick"]; -let replace_with = &["sloth", "grey", "slow"]; - -// In a real example, these might be `std::fs::File`s instead. All you need to -// do is supply a pair of `std::io::Read` and `std::io::Write` implementations. -let rdr = "The quick brown fox."; -let mut wtr = vec![]; - -let ac = AhoCorasick::new(patterns); -ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?; -assert_eq!(b"The slow grey sloth.".to_vec(), wtr); -# Ok(()) }; example().unwrap() -``` - -# Example: finding the leftmost first match - -In the textbook description of Aho-Corasick, its formulation is typically -structured such that it reports all possible matches, even when they overlap -with another. In many cases, overlapping matches may not be desired, such as -the case of finding all successive non-overlapping matches like you might with -a standard regular expression. - -Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do -this doesn't always work in the expected way, since it will report matches as -soon as they are seen. For example, consider matching the regex `Samwise|Sam` -against the text `Samwise`. Most regex engines (that are Perl-like, or -non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick -algorithm modified for reporting non-overlapping matches will report `Sam`. - -A novel contribution of this library is the ability to change the match -semantics of Aho-Corasick (without additional search time overhead) such that -`Samwise` is reported instead. For example, here's the standard approach: - -``` -use aho_corasick::AhoCorasick; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasick::new(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Sam", &haystack[mat.start()..mat.end()]); -``` - -And now here's the leftmost-first version, which matches how a Perl-like -regex will work: - -``` -use aho_corasick::{AhoCorasickBuilder, MatchKind}; - -let patterns = &["Samwise", "Sam"]; -let haystack = "Samwise"; - -let ac = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(patterns); -let mat = ac.find(haystack).expect("should have a match"); -assert_eq!("Samwise", &haystack[mat.start()..mat.end()]); -``` - -In addition to leftmost-first semantics, this library also supports -leftmost-longest semantics, which match the POSIX behavior of a regular -expression alternation. See -[`MatchKind`](enum.MatchKind.html) -for more details. - -# Prefilters - -While an Aho-Corasick automaton can perform admirably when compared to more -naive solutions, it is generally slower than more specialized algorithms that -are accelerated using vector instructions such as SIMD. - -For that reason, this library will internally use a "prefilter" to attempt -to accelerate searches when possible. Currently, this library has several -different algorithms it might use depending on the patterns provided. Once the -number of patterns gets too big, prefilters are no longer used. - -While a prefilter is generally good to have on by default since it works -well in the common case, it can lead to less predictable or even sub-optimal -performance in some cases. For that reason, prefilters can be explicitly -disabled via -[`AhoCorasickBuilder::prefilter`](struct.AhoCorasickBuilder.html#method.prefilter). -*/ - -#![deny(missing_docs)] - -// We can never be truly no_std, but we could be alloc-only some day, so -// require the std feature for now. -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); - -// #[cfg(doctest)] -// #[macro_use] -// extern crate doc_comment; - -// #[cfg(doctest)] -// doctest!("../README.md"); - -pub use crate::ahocorasick::{ - AhoCorasick, AhoCorasickBuilder, FindIter, FindOverlappingIter, MatchKind, - StreamFindIter, -}; -pub use crate::error::{Error, ErrorKind}; -pub use crate::state_id::StateID; - -mod ahocorasick; -mod automaton; -mod buffer; -mod byte_frequencies; -mod classes; -mod dfa; -mod error; -mod nfa; -pub mod packed; -mod prefilter; -mod state_id; -#[cfg(test)] -mod tests; - -/// A representation of a match reported by an Aho-Corasick automaton. -/// -/// A match has two essential pieces of information: the identifier of the -/// pattern that matched, along with the start and end offsets of the match -/// in the haystack. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use aho_corasick::AhoCorasick; -/// -/// let ac = AhoCorasick::new(&[ -/// "foo", "bar", "baz", -/// ]); -/// let mat = ac.find("xxx bar xxx").expect("should have a match"); -/// assert_eq!(1, mat.pattern()); -/// assert_eq!(4, mat.start()); -/// assert_eq!(7, mat.end()); -/// ``` -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct Match { - /// The pattern id. - pattern: usize, - /// The length of this match, such that the starting position of the match - /// is `end - len`. - /// - /// We use length here because, other than the pattern id, the only - /// information about each pattern that the automaton stores is its length. - /// So using the length here is just a bit more natural. But it isn't - /// technically required. - len: usize, - /// The end offset of the match, exclusive. - end: usize, -} - -impl Match { - /// Returns the identifier of the pattern that matched. - /// - /// The identifier of a pattern is derived from the position in which it - /// was originally inserted into the corresponding automaton. The first - /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` - /// and so on. - #[inline] - pub fn pattern(&self) -> usize { - self.pattern - } - - /// The starting position of the match. - #[inline] - pub fn start(&self) -> usize { - self.end - self.len - } - - /// The ending position of the match. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// The length, in bytes, of the match. - #[inline] - pub fn len(&self) -> usize { - self.len - } - - /// Returns true if and only if this match is empty. That is, when - /// `start() == end()`. - /// - /// An empty match can only be returned when the empty string was among - /// the patterns used to build the Aho-Corasick automaton. - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[inline] - fn increment(&self, by: usize) -> Match { - Match { pattern: self.pattern, len: self.len, end: self.end + by } - } - - #[inline] - fn from_span(id: usize, start: usize, end: usize) -> Match { - Match { pattern: id, len: end - start, end } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/nfa.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1214 +0,0 @@ -use std::cmp; -use std::collections::{BTreeSet, VecDeque}; -use std::fmt; -use std::mem::size_of; -use std::ops::{Index, IndexMut}; - -use crate::ahocorasick::MatchKind; -use crate::automaton::Automaton; -use crate::classes::{ByteClassBuilder, ByteClasses}; -use crate::error::Result; -use crate::prefilter::{self, opposite_ascii_case, Prefilter, PrefilterObj}; -use crate::state_id::{dead_id, fail_id, usize_to_state_id, StateID}; -use crate::Match; - -/// The identifier for a pattern, which is simply the position of the pattern -/// in the sequence of patterns given by the caller. -pub type PatternID = usize; - -/// The length of a pattern, in bytes. -pub type PatternLength = usize; - -/// An Aho-Corasick automaton, represented as an NFA. -/// -/// This is the classical formulation of Aho-Corasick, which involves building -/// up a prefix trie of a given set of patterns, and then wiring up failure -/// transitions between states in order to guarantee linear time matching. The -/// standard formulation is, technically, an NFA because of these failure -/// transitions. That is, one can see them as enabling the automaton to be in -/// multiple states at once. Indeed, during search, it is possible to check -/// the transitions on multiple states for a single input byte. -/// -/// This particular implementation not only supports the standard style of -/// matching, but also provides a mode for choosing leftmost-first or -/// leftmost-longest match semantics. When a leftmost mode is chosen, some -/// failure transitions that would otherwise be added are elided. See -/// the documentation of `MatchKind` for more details and examples on how the -/// match semantics may differ. -/// -/// If one wants a DFA, then it is necessary to first build an NFA and convert -/// it into a DFA. Note, however, that because we've constrained ourselves to -/// matching literal patterns, this does not need to use subset construction -/// for determinization. Instead, the DFA has at most a number of states -/// equivalent to the number of NFA states. The only real difference between -/// them is that all failure transitions are followed and pre-computed. This -/// uses much more memory, but also executes searches more quickly. -#[derive(Clone)] -pub struct NFA { - /// The match semantics built into this NFA. - match_kind: MatchKind, - /// The start state id as an index into `states`. - start_id: S, - /// The length, in bytes, of the longest pattern in this automaton. This - /// information is useful for keeping correct buffer sizes when searching - /// on streams. - max_pattern_len: usize, - /// The total number of patterns added to this automaton, including - /// patterns that may never be matched. - pattern_count: usize, - /// The number of bytes of heap used by this NFA's transition table. - heap_bytes: usize, - /// A prefilter for quickly skipping to candidate matches, if pertinent. - prefilter: Option, - /// Whether this automaton anchors all matches to the start of input. - anchored: bool, - /// A set of equivalence classes in terms of bytes. We compute this while - /// building the NFA, but don't use it in the NFA's states. Instead, we - /// use this for building the DFA. We store it on the NFA since it's easy - /// to compute while visiting the patterns. - byte_classes: ByteClasses, - /// A set of states. Each state defines its own transitions, a fail - /// transition and a set of indices corresponding to matches. - /// - /// The first state is always the fail state, which is used only as a - /// sentinel. Namely, in the final NFA, no transition into the fail state - /// exists. (Well, they do, but they aren't followed. Instead, the state's - /// failure transition is followed.) - /// - /// The second state (index 1) is always the dead state. Dead states are - /// in every automaton, but only used when leftmost-{first,longest} match - /// semantics are enabled. Specifically, they instruct search to stop - /// at specific points in order to report the correct match location. In - /// the standard Aho-Corasick construction, there are no transitions to - /// the dead state. - /// - /// The third state (index 2) is generally intended to be the starting or - /// "root" state. - states: Vec>, -} - -impl NFA { - /// Returns the equivalence classes of bytes found while constructing - /// this NFA. - /// - /// Note that the NFA doesn't actually make use of these equivalence - /// classes. Instead, these are useful for building the DFA when desired. - pub fn byte_classes(&self) -> &ByteClasses { - &self.byte_classes - } - - /// Returns a prefilter, if one exists. - pub fn prefilter_obj(&self) -> Option<&PrefilterObj> { - self.prefilter.as_ref() - } - - /// Returns the total number of heap bytes used by this NFA's transition - /// table. - pub fn heap_bytes(&self) -> usize { - self.heap_bytes - + self.prefilter.as_ref().map_or(0, |p| p.as_ref().heap_bytes()) - } - - /// Return the length of the longest pattern in this automaton. - pub fn max_pattern_len(&self) -> usize { - self.max_pattern_len - } - - /// Return the total number of patterns added to this automaton. - pub fn pattern_count(&self) -> usize { - self.pattern_count - } - - /// Returns the total number of states in this NFA. - pub fn state_len(&self) -> usize { - self.states.len() - } - - /// Returns the matches for the given state. - pub fn matches(&self, id: S) -> &[(PatternID, PatternLength)] { - &self.states[id.to_usize()].matches - } - - /// Returns an iterator over all transitions in the given state according - /// to the given equivalence classes, including transitions to `fail_id()`. - /// The number of transitions returned is always equivalent to the number - /// of equivalence classes. - pub fn iter_all_transitions( - &self, - byte_classes: &ByteClasses, - id: S, - f: F, - ) { - self.states[id.to_usize()].trans.iter_all(byte_classes, f); - } - - /// Returns the failure transition for the given state. - pub fn failure_transition(&self, id: S) -> S { - self.states[id.to_usize()].fail - } - - /// Returns the next state for the given state and input byte. - /// - /// Note that this does not follow failure transitions. As such, the id - /// returned may be `fail_id`. - pub fn next_state(&self, current: S, input: u8) -> S { - self.states[current.to_usize()].next_state(input) - } - - fn state(&self, id: S) -> &State { - &self.states[id.to_usize()] - } - - fn state_mut(&mut self, id: S) -> &mut State { - &mut self.states[id.to_usize()] - } - - fn start(&self) -> &State { - self.state(self.start_id) - } - - fn start_mut(&mut self) -> &mut State { - let id = self.start_id; - self.state_mut(id) - } - - fn iter_transitions_mut(&mut self, id: S) -> IterTransitionsMut<'_, S> { - IterTransitionsMut::new(self, id) - } - - fn copy_matches(&mut self, src: S, dst: S) { - let (src, dst) = - get_two_mut(&mut self.states, src.to_usize(), dst.to_usize()); - dst.matches.extend_from_slice(&src.matches); - } - - fn copy_empty_matches(&mut self, dst: S) { - let start_id = self.start_id; - self.copy_matches(start_id, dst); - } - - fn add_dense_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Dense(Dense::new()); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } - - fn add_sparse_state(&mut self, depth: usize) -> Result { - let trans = Transitions::Sparse(vec![]); - let id = usize_to_state_id(self.states.len())?; - self.states.push(State { - trans, - // Anchored automatons do not have any failure transitions. - fail: if self.anchored { dead_id() } else { self.start_id }, - depth, - matches: vec![], - }); - Ok(id) - } -} - -impl Automaton for NFA { - type ID = S; - - fn match_kind(&self) -> &MatchKind { - &self.match_kind - } - - fn anchored(&self) -> bool { - self.anchored - } - - fn prefilter(&self) -> Option<&dyn Prefilter> { - self.prefilter.as_ref().map(|p| p.as_ref()) - } - - fn start_state(&self) -> S { - self.start_id - } - - fn is_valid(&self, id: S) -> bool { - id.to_usize() < self.states.len() - } - - fn is_match_state(&self, id: S) -> bool { - self.states[id.to_usize()].is_match() - } - - fn get_match( - &self, - id: S, - match_index: usize, - end: usize, - ) -> Option { - let state = match self.states.get(id.to_usize()) { - None => return None, - Some(state) => state, - }; - state.matches.get(match_index).map(|&(id, len)| Match { - pattern: id, - len, - end, - }) - } - - fn match_count(&self, id: S) -> usize { - self.states[id.to_usize()].matches.len() - } - - fn next_state(&self, mut current: S, input: u8) -> S { - // This terminates since: - // - // 1. `State.fail` never points to fail_id(). - // 2. All `State.fail` values point to a state closer to `start`. - // 3. The start state has no transitions to fail_id(). - loop { - let state = &self.states[current.to_usize()]; - let next = state.next_state(input); - if next != fail_id() { - return next; - } - current = state.fail; - } - } -} - -/// A representation of an NFA state for an Aho-Corasick automaton. -/// -/// It contains the transitions to the next state, a failure transition for -/// cases where there exists no other transition for the current input byte, -/// the matches implied by visiting this state (if any) and the depth of this -/// state. The depth of a state is simply the distance from it to the start -/// state in the automaton, where the depth of the start state is 0. -#[derive(Clone, Debug)] -pub struct State { - trans: Transitions, - fail: S, - matches: Vec<(PatternID, PatternLength)>, - // TODO: Strictly speaking, this isn't needed for searching. It's only - // used when building an NFA that supports leftmost match semantics. We - // could drop this from the state and dynamically build a map only when - // computing failure transitions, but it's not clear which is better. - // Benchmark this. - depth: usize, -} - -impl State { - fn heap_bytes(&self) -> usize { - self.trans.heap_bytes() - + (self.matches.len() * size_of::<(PatternID, PatternLength)>()) - } - - fn add_match(&mut self, i: PatternID, len: PatternLength) { - self.matches.push((i, len)); - } - - fn is_match(&self) -> bool { - !self.matches.is_empty() - } - - fn next_state(&self, input: u8) -> S { - self.trans.next_state(input) - } - - fn set_next_state(&mut self, input: u8, next: S) { - self.trans.set_next_state(input, next); - } -} - -/// Represents the transitions for a single dense state. -/// -/// The primary purpose here is to encapsulate index access. Namely, since a -/// dense representation always contains 256 elements, all values of `u8` are -/// valid indices. -#[derive(Clone, Debug)] -struct Dense(Vec); - -impl Dense -where - S: StateID, -{ - fn new() -> Self { - Dense(vec![fail_id(); 256]) - } - - #[inline] - fn len(&self) -> usize { - self.0.len() - } -} - -impl Index for Dense { - type Output = S; - - #[inline] - fn index(&self, i: u8) -> &S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &self.0[i as usize] - } -} - -impl IndexMut for Dense { - #[inline] - fn index_mut(&mut self, i: u8) -> &mut S { - // SAFETY: This is safe because all dense transitions have - // exactly 256 elements, so all u8 values are valid indices. - &mut self.0[i as usize] - } -} - -/// A representation of transitions in an NFA. -/// -/// Transitions have either a sparse representation, which is slower for -/// lookups but uses less memory, or a dense representation, which is faster -/// for lookups but uses more memory. In the sparse representation, the absence -/// of a state implies a transition to `fail_id()`. Transitions to `dead_id()` -/// are still explicitly represented. -/// -/// For the NFA, by default, we use a dense representation for transitions for -/// states close to the start state because it's likely these are the states -/// that will be most frequently visited. -#[derive(Clone, Debug)] -enum Transitions { - Sparse(Vec<(u8, S)>), - Dense(Dense), -} - -impl Transitions { - fn heap_bytes(&self) -> usize { - match *self { - Transitions::Sparse(ref sparse) => { - sparse.len() * size_of::<(u8, S)>() - } - Transitions::Dense(ref dense) => dense.len() * size_of::(), - } - } - - fn next_state(&self, input: u8) -> S { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - if b == input { - return id; - } - } - fail_id() - } - Transitions::Dense(ref dense) => dense[input], - } - } - - fn set_next_state(&mut self, input: u8, next: S) { - match *self { - Transitions::Sparse(ref mut sparse) => { - match sparse.binary_search_by_key(&input, |&(b, _)| b) { - Ok(i) => sparse[i] = (input, next), - Err(i) => sparse.insert(i, (input, next)), - } - } - Transitions::Dense(ref mut dense) => { - dense[input] = next; - } - } - } - - /// Iterate over transitions in this state while skipping over transitions - /// to `fail_id()`. - fn iter(&self, mut f: F) { - match *self { - Transitions::Sparse(ref sparse) => { - for &(b, id) in sparse { - f(b, id); - } - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - let id = dense[b]; - if id != fail_id() { - f(b, id); - } - } - } - } - } - - /// Iterate over all transitions in this state according to the given - /// equivalence classes, including transitions to `fail_id()`. - fn iter_all(&self, classes: &ByteClasses, mut f: F) { - if classes.is_singleton() { - match *self { - Transitions::Sparse(ref sparse) => { - sparse_iter(sparse, f); - } - Transitions::Dense(ref dense) => { - for b in AllBytesIter::new() { - f(b, dense[b]); - } - } - } - } else { - // In this case, we only want to yield a single byte for each - // equivalence class. - match *self { - Transitions::Sparse(ref sparse) => { - let mut last_class = None; - sparse_iter(sparse, |b, next| { - let class = classes.get(b); - if last_class != Some(class) { - last_class = Some(class); - f(b, next); - } - }) - } - Transitions::Dense(ref dense) => { - for b in classes.representatives() { - f(b, dense[b]); - } - } - } - } - } -} - -/// Iterator over transitions in a state, skipping transitions to `fail_id()`. -/// -/// This abstracts over the representation of NFA transitions, which may be -/// either in a sparse or dense representation. -/// -/// This somewhat idiosyncratically borrows the NFA mutably, so that when one -/// is iterating over transitions, the caller can still mutate the NFA. This -/// is useful when creating failure transitions. -#[derive(Debug)] -struct IterTransitionsMut<'a, S: StateID> { - nfa: &'a mut NFA, - state_id: S, - cur: usize, -} - -impl<'a, S: StateID> IterTransitionsMut<'a, S> { - fn new(nfa: &'a mut NFA, state_id: S) -> IterTransitionsMut<'a, S> { - IterTransitionsMut { nfa, state_id, cur: 0 } - } - - fn nfa(&mut self) -> &mut NFA { - self.nfa - } -} - -impl<'a, S: StateID> Iterator for IterTransitionsMut<'a, S> { - type Item = (u8, S); - - fn next(&mut self) -> Option<(u8, S)> { - match self.nfa.states[self.state_id.to_usize()].trans { - Transitions::Sparse(ref sparse) => { - if self.cur >= sparse.len() { - return None; - } - let i = self.cur; - self.cur += 1; - Some(sparse[i]) - } - Transitions::Dense(ref dense) => { - while self.cur < dense.len() { - // There are always exactly 255 transitions in dense repr. - debug_assert!(self.cur < 256); - - let b = self.cur as u8; - let id = dense[b]; - self.cur += 1; - if id != fail_id() { - return Some((b, id)); - } - } - None - } - } - } -} - -/// A simple builder for configuring the NFA construction of Aho-Corasick. -#[derive(Clone, Debug)] -pub struct Builder { - dense_depth: usize, - match_kind: MatchKind, - prefilter: bool, - anchored: bool, - ascii_case_insensitive: bool, -} - -impl Default for Builder { - fn default() -> Builder { - Builder { - dense_depth: 2, - match_kind: MatchKind::default(), - prefilter: true, - anchored: false, - ascii_case_insensitive: false, - } - } -} - -impl Builder { - pub fn new() -> Builder { - Builder::default() - } - - pub fn build(&self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - Compiler::new(self)?.compile(patterns) - } - - pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder { - self.match_kind = kind; - self - } - - pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { - self.dense_depth = depth; - self - } - - pub fn prefilter(&mut self, yes: bool) -> &mut Builder { - self.prefilter = yes; - self - } - - pub fn anchored(&mut self, yes: bool) -> &mut Builder { - self.anchored = yes; - self - } - - pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder { - self.ascii_case_insensitive = yes; - self - } -} - -/// A compiler uses a builder configuration and builds up the NFA formulation -/// of an Aho-Corasick automaton. This roughly corresponds to the standard -/// formulation described in textbooks. -#[derive(Debug)] -struct Compiler<'a, S: StateID> { - builder: &'a Builder, - prefilter: prefilter::Builder, - nfa: NFA, - byte_classes: ByteClassBuilder, -} - -impl<'a, S: StateID> Compiler<'a, S> { - fn new(builder: &'a Builder) -> Result> { - Ok(Compiler { - builder, - prefilter: prefilter::Builder::new(builder.match_kind) - .ascii_case_insensitive(builder.ascii_case_insensitive), - nfa: NFA { - match_kind: builder.match_kind, - start_id: usize_to_state_id(2)?, - max_pattern_len: 0, - pattern_count: 0, - heap_bytes: 0, - prefilter: None, - anchored: builder.anchored, - byte_classes: ByteClasses::singletons(), - states: vec![], - }, - byte_classes: ByteClassBuilder::new(), - }) - } - - fn compile(mut self, patterns: I) -> Result> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - self.add_state(0)?; // the fail state, which is never entered - self.add_state(0)?; // the dead state, only used for leftmost - self.add_state(0)?; // the start state - self.build_trie(patterns)?; - self.add_start_state_loop(); - self.add_dead_state_loop(); - if !self.builder.anchored { - self.fill_failure_transitions(); - } - self.close_start_state_loop(); - self.nfa.byte_classes = self.byte_classes.build(); - if !self.builder.anchored { - self.nfa.prefilter = self.prefilter.build(); - } - self.calculate_size(); - Ok(self.nfa) - } - - /// This sets up the initial prefix trie that makes up the Aho-Corasick - /// automaton. Effectively, it creates the basic structure of the - /// automaton, where every pattern given has a path from the start state to - /// the end of the pattern. - fn build_trie(&mut self, patterns: I) -> Result<()> - where - I: IntoIterator, - P: AsRef<[u8]>, - { - 'PATTERNS: for (pati, pat) in patterns.into_iter().enumerate() { - let pat = pat.as_ref(); - self.nfa.max_pattern_len = - cmp::max(self.nfa.max_pattern_len, pat.len()); - self.nfa.pattern_count += 1; - - let mut prev = self.nfa.start_id; - let mut saw_match = false; - for (depth, &b) in pat.iter().enumerate() { - // When leftmost-first match semantics are requested, we - // specifically stop adding patterns when a previously added - // pattern is a prefix of it. We avoid adding it because - // leftmost-first semantics imply that the pattern can never - // match. This is not just an optimization to save space! It - // is necessary for correctness. In fact, this is the only - // difference in the automaton between the implementations for - // leftmost-first and leftmost-longest. - saw_match = saw_match || self.nfa.state(prev).is_match(); - if self.builder.match_kind.is_leftmost_first() && saw_match { - // Skip to the next pattern immediately. This avoids - // incorrectly adding a match after this loop terminates. - continue 'PATTERNS; - } - - // Add this byte to our equivalence classes. We don't use these - // for NFA construction. These are instead used only if we're - // building a DFA. They would technically be useful for the - // NFA, but it would require a second pass over the patterns. - self.byte_classes.set_range(b, b); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.byte_classes.set_range(b, b); - } - - // If the transition from prev using the current byte already - // exists, then just move through it. Otherwise, add a new - // state. We track the depth here so that we can determine - // how to represent transitions. States near the start state - // use a dense representation that uses more memory but is - // faster. Other states use a sparse representation that uses - // less memory but is slower. - let next = self.nfa.state(prev).next_state(b); - if next != fail_id() { - prev = next; - } else { - let next = self.add_state(depth + 1)?; - self.nfa.state_mut(prev).set_next_state(b, next); - if self.builder.ascii_case_insensitive { - let b = opposite_ascii_case(b); - self.nfa.state_mut(prev).set_next_state(b, next); - } - prev = next; - } - } - // Once the pattern has been added, log the match in the final - // state that it reached. - self.nfa.state_mut(prev).add_match(pati, pat.len()); - // ... and hand it to the prefilter builder, if applicable. - if self.builder.prefilter { - self.prefilter.add(pat); - } - } - Ok(()) - } - - /// This routine creates failure transitions according to the standard - /// textbook formulation of the Aho-Corasick algorithm, with a couple small - /// tweaks to support "leftmost" semantics. - /// - /// Building failure transitions is the most interesting part of building - /// the Aho-Corasick automaton, because they are what allow searches to - /// be performed in linear time. Specifically, a failure transition is - /// a single transition associated with each state that points back to - /// the longest proper suffix of the pattern being searched. The failure - /// transition is followed whenever there exists no transition on the - /// current state for the current input byte. If there is no other proper - /// suffix, then the failure transition points back to the starting state. - /// - /// For example, let's say we built an Aho-Corasick automaton with the - /// following patterns: 'abcd' and 'cef'. The trie looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// At this point, it should be fairly straight-forward to see how this - /// trie can be used in a simplistic way. At any given position in the - /// text we're searching (called the "subject" string), all we need to do - /// is follow the transitions in the trie by consuming one transition for - /// each byte in the subject string. If we reach a match state, then we can - /// report that location as a match. - /// - /// The trick comes when searching a subject string like 'abcef'. We'll - /// initially follow the transition from S0 to S1 and wind up in S3 after - /// observng the 'c' byte. At this point, the next byte is 'e' but state - /// S3 has no transition for 'e', so the search fails. We then would need - /// to restart the search at the next position in 'abcef', which - /// corresponds to 'b'. The match would fail, but the next search starting - /// at 'c' would finally succeed. The problem with this approach is that - /// we wind up searching the subject string potentially many times. In - /// effect, this makes the algorithm have worst case `O(n * m)` complexity, - /// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead - /// like to achieve a `O(n + m)` worst case complexity. - /// - /// This is where failure transitions come in. Instead of dying at S3 in - /// the first search, the automaton can instruct the search to move to - /// another part of the automaton that corresponds to a suffix of what - /// we've seen so far. Recall that we've seen 'abc' in the subject string, - /// and the automaton does indeed have a non-empty suffix, 'c', that could - /// potentially lead to another match. Thus, the actual Aho-Corasick - /// automaton for our patterns in this case looks like this: - /// - /// ```ignore - /// a - S1 - b - S2 - c - S3 - d - S4* - /// / / - /// / ---------------- - /// / / - /// S0 - c - S5 - e - S6 - f - S7* - /// ``` - /// - /// That is, we have a failure transition from S3 to S5, which is followed - /// exactly in cases when we are in state S3 but see any byte other than - /// 'd' (that is, we've "failed" to find a match in this portion of our - /// trie). We know we can transition back to S5 because we've already seen - /// a 'c' byte, so we don't need to re-scan it. We can then pick back up - /// with the search starting at S5 and complete our match. - /// - /// Adding failure transitions to a trie is fairly simple, but subtle. The - /// key issue is that you might have multiple failure transition that you - /// need to follow. For example, look at the trie for the patterns - /// 'abcd', 'b', 'bcd' and 'cd': - /// - /// ```ignore - /// - a - S1 - b - S2* - c - S3 - d - S4* - /// / / / - /// / ------- ------- - /// / / / - /// S0 --- b - S5* - c - S6 - d - S7* - /// \ / - /// \ -------- - /// \ / - /// - c - S8 - d - S9* - /// ``` - /// - /// The failure transitions for this trie are defined from S2 to S5, - /// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it - /// corresponds to a match, since its failure transition to S5 is itself - /// a match state. - /// - /// Perhaps simplest way to think about adding these failure transitions - /// is recursively. That is, if you know the failure transitions for every - /// possible previous state that could be visited (e.g., when computing the - /// failure transition for S3, you already know the failure transitions - /// for S0, S1 and S2), then you can simply follow the failure transition - /// of the previous state and check whether the incoming transition is - /// defined after following the failure transition. - /// - /// For example, when determining the failure state for S3, by our - /// assumptions, we already know that there is a failure transition from - /// S2 (the previous state) to S5. So we follow that transition and check - /// whether the transition connecting S2 to S3 is defined. Indeed, it is, - /// as there is a transition from S5 to S6 for the byte 'c'. If no such - /// transition existed, we could keep following the failure transitions - /// until we reach the start state, which is the failure transition for - /// every state that has no corresponding proper suffix. - /// - /// We don't actually use recursion to implement this, but instead, use a - /// breadth first search of the automaton. Our base case is the start - /// state, whose failure transition is just a transition to itself. - /// - /// When building a leftmost automaton, we proceed as above, but only - /// include a subset of failure transitions. Namely, we omit any failure - /// transitions that appear after a match state in the trie. This is - /// because failure transitions always point back to a proper suffix of - /// what has been seen so far. Thus, following a failure transition after - /// a match implies looking for a match that starts after the one that has - /// already been seen, which is of course therefore not the leftmost match. - /// - /// N.B. I came up with this algorithm on my own, and after scouring all of - /// the other AC implementations I know of (Perl, Snort, many on GitHub). - /// I couldn't find any that implement leftmost semantics like this. - /// Perl of course needs leftmost-first semantics, but they implement it - /// with a seeming hack at *search* time instead of encoding it into the - /// automaton. There are also a couple Java libraries that support leftmost - /// longest semantics, but they do it by building a queue of matches at - /// search time, which is even worse than what Perl is doing. ---AG - fn fill_failure_transitions(&mut self) { - let kind = self.match_kind(); - // Initialize the queue for breadth first search with all transitions - // out of the start state. We handle the start state specially because - // we only want to follow non-self transitions. If we followed self - // transitions, then this would never terminate. - let mut queue = VecDeque::new(); - let mut seen = self.queued_set(); - let mut it = self.nfa.iter_transitions_mut(self.nfa.start_id); - while let Some((_, next)) = it.next() { - // Skip anything we've seen before and any self-transitions on the - // start state. - if next == it.nfa().start_id || seen.contains(next) { - continue; - } - queue.push_back(next); - seen.insert(next); - // Under leftmost semantics, if a state immediately following - // the start state is a match state, then we never want to - // follow its failure transition since the failure transition - // necessarily leads back to the start state, which we never - // want to do for leftmost matching after a match has been - // found. - // - // We apply the same logic to non-start states below as well. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - } - } - while let Some(id) = queue.pop_front() { - let mut it = self.nfa.iter_transitions_mut(id); - while let Some((b, next)) = it.next() { - if seen.contains(next) { - // The only way to visit a duplicate state in a transition - // list is when ASCII case insensitivity is enabled. In - // this case, we want to skip it since it's redundant work. - // But it would also end up duplicating matches, which - // results in reporting duplicate matches in some cases. - // See the 'acasei010' regression test. - continue; - } - queue.push_back(next); - seen.insert(next); - - // As above for start states, under leftmost semantics, once - // we see a match all subsequent states should have no failure - // transitions because failure transitions always imply looking - // for a match that is a suffix of what has been seen so far - // (where "seen so far" corresponds to the string formed by - // following the transitions from the start state to the - // current state). Under leftmost semantics, we specifically do - // not want to allow this to happen because we always want to - // report the match found at the leftmost position. - // - // The difference between leftmost-first and leftmost-longest - // occurs previously while we build the trie. For - // leftmost-first, we simply omit any entries that would - // otherwise require passing through a match state. - // - // Note that for correctness, the failure transition has to be - // set to the dead state for ALL states following a match, not - // just the match state itself. However, by setting the failure - // transition to the dead state on all match states, the dead - // state will automatically propagate to all subsequent states - // via the failure state computation below. - if kind.is_leftmost() && it.nfa().state(next).is_match() { - it.nfa().state_mut(next).fail = dead_id(); - continue; - } - let mut fail = it.nfa().state(id).fail; - while it.nfa().state(fail).next_state(b) == fail_id() { - fail = it.nfa().state(fail).fail; - } - fail = it.nfa().state(fail).next_state(b); - it.nfa().state_mut(next).fail = fail; - it.nfa().copy_matches(fail, next); - } - // If the start state is a match state, then this automaton can - // match the empty string. This implies all states are match states - // since every position matches the empty string, so copy the - // matches from the start state to every state. Strictly speaking, - // this is only necessary for overlapping matches since each - // non-empty non-start match state needs to report empty matches - // in addition to its own. For the non-overlapping case, such - // states only report the first match, which is never empty since - // it isn't a start state. - if !kind.is_leftmost() { - it.nfa().copy_empty_matches(id); - } - } - } - - /// Returns a set that tracked queued states. - /// - /// This is only necessary when ASCII case insensitivity is enabled, since - /// it is the only way to visit the same state twice. Otherwise, this - /// returns an inert set that nevers adds anything and always reports - /// `false` for every member test. - fn queued_set(&self) -> QueuedSet { - if self.builder.ascii_case_insensitive { - QueuedSet::active() - } else { - QueuedSet::inert() - } - } - - /// Set the failure transitions on the start state to loop back to the - /// start state. This effectively permits the Aho-Corasick automaton to - /// match at any position. This is also required for finding the next - /// state to terminate, namely, finding the next state should never return - /// a fail_id. - /// - /// This must be done after building the initial trie, since trie - /// construction depends on transitions to `fail_id` to determine whether a - /// state already exists or not. - fn add_start_state_loop(&mut self) { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == fail_id() { - start.set_next_state(b, start_id); - } - } - } - - /// Remove the start state loop by rewriting any transitions on the start - /// state back to the start state with transitions to the dead state. - /// - /// The loop is only closed when two conditions are met: the start state - /// is a match state and the match kind is leftmost-first or - /// leftmost-longest. (Alternatively, if this is an anchored automaton, - /// then the start state is always closed, regardless of aforementioned - /// conditions.) - /// - /// The reason for this is that under leftmost semantics, a start state - /// that is also a match implies that we should never restart the search - /// process. We allow normal transitions out of the start state, but if - /// none exist, we transition to the dead state, which signals that - /// searching should stop. - fn close_start_state_loop(&mut self) { - if self.builder.anchored - || (self.match_kind().is_leftmost() && self.nfa.start().is_match()) - { - let start_id = self.nfa.start_id; - let start = self.nfa.start_mut(); - for b in AllBytesIter::new() { - if start.next_state(b) == start_id { - start.set_next_state(b, dead_id()); - } - } - } - } - - /// Sets all transitions on the dead state to point back to the dead state. - /// Normally, missing transitions map back to the failure state, but the - /// point of the dead state is to act as a sink that can never be escaped. - fn add_dead_state_loop(&mut self) { - let dead = self.nfa.state_mut(dead_id()); - for b in AllBytesIter::new() { - dead.set_next_state(b, dead_id()); - } - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_size(&mut self) { - let mut size = 0; - for state in &self.nfa.states { - size += size_of::>() + state.heap_bytes(); - } - self.nfa.heap_bytes = size; - } - - /// Add a new state to the underlying NFA with the given depth. The depth - /// is used to determine how to represent the transitions. - /// - /// If adding the new state would overflow the chosen state ID - /// representation, then this returns an error. - fn add_state(&mut self, depth: usize) -> Result { - if depth < self.builder.dense_depth { - self.nfa.add_dense_state(depth) - } else { - self.nfa.add_sparse_state(depth) - } - } - - /// Returns the match kind configured on the underlying builder. - fn match_kind(&self) -> MatchKind { - self.builder.match_kind - } -} - -/// A set of state identifiers used to avoid revisiting the same state multiple -/// times when filling in failure transitions. -/// -/// This set has an "inert" and an "active" mode. When inert, the set never -/// stores anything and always returns `false` for every member test. This is -/// useful to avoid the performance and memory overhead of maintaining this -/// set when it is not needed. -#[derive(Debug)] -struct QueuedSet { - set: Option>, -} - -impl QueuedSet { - /// Return an inert set that returns `false` for every state ID membership - /// test. - fn inert() -> QueuedSet { - QueuedSet { set: None } - } - - /// Return an active set that tracks state ID membership. - fn active() -> QueuedSet { - QueuedSet { set: Some(BTreeSet::new()) } - } - - /// Inserts the given state ID into this set. (If the set is inert, then - /// this is a no-op.) - fn insert(&mut self, state_id: S) { - if let Some(ref mut set) = self.set { - set.insert(state_id); - } - } - - /// Returns true if and only if the given state ID is in this set. If the - /// set is inert, this always returns false. - fn contains(&self, state_id: S) -> bool { - match self.set { - None => false, - Some(ref set) => set.contains(&state_id), - } - } -} - -/// An iterator over every byte value. -/// -/// We use this instead of (0..256).map(|b| b as u8) because this optimizes -/// better in debug builds. -/// -/// We also use this instead of 0..=255 because we're targeting Rust 1.24 and -/// inclusive range syntax was stabilized in Rust 1.26. We can get rid of this -/// once our MSRV is Rust 1.26 or newer. -#[derive(Debug)] -struct AllBytesIter(u16); - -impl AllBytesIter { - fn new() -> AllBytesIter { - AllBytesIter(0) - } -} - -impl Iterator for AllBytesIter { - type Item = u8; - - fn next(&mut self) -> Option { - if self.0 >= 256 { - None - } else { - let b = self.0 as u8; - self.0 += 1; - Some(b) - } - } -} - -impl fmt::Debug for NFA { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "NFA(")?; - writeln!(f, "match_kind: {:?}", self.match_kind)?; - writeln!(f, "prefilter: {:?}", self.prefilter)?; - writeln!(f, "{}", "-".repeat(79))?; - for (id, s) in self.states.iter().enumerate() { - let mut trans = vec![]; - s.trans.iter(|byte, next| { - // The start state has a bunch of uninteresting transitions - // back into itself. It's questionable to hide them since they - // are critical to understanding the automaton, but they are - // very noisy without better formatting for contiugous ranges - // to the same state. - if id == self.start_id.to_usize() && next == self.start_id { - return; - } - // Similarly, the dead state has a bunch of uninteresting - // transitions too. - if id == dead_id() { - return; - } - trans.push(format!("{} => {}", escape(byte), next.to_usize())); - }); - writeln!(f, "{:04}: {}", id, trans.join(", "))?; - - let matches: Vec = s - .matches - .iter() - .map(|&(pattern_id, _)| pattern_id.to_string()) - .collect(); - writeln!(f, " matches: {}", matches.join(", "))?; - writeln!(f, " fail: {}", s.fail.to_usize())?; - writeln!(f, " depth: {}", s.depth)?; - } - writeln!(f, "{}", "-".repeat(79))?; - writeln!(f, ")")?; - Ok(()) - } -} - -/// Iterate over all possible byte transitions given a sparse set. -fn sparse_iter(trans: &[(u8, S)], mut f: F) { - let mut byte = 0u16; - for &(b, id) in trans { - while byte < (b as u16) { - f(byte as u8, fail_id()); - byte += 1; - } - f(b, id); - byte += 1; - } - for b in byte..256 { - f(b as u8, fail_id()); - } -} - -/// Safely return two mutable borrows to two different locations in the given -/// slice. -/// -/// This panics if i == j. -fn get_two_mut(xs: &mut [T], i: usize, j: usize) -> (&mut T, &mut T) { - assert!(i != j, "{} must not be equal to {}", i, j); - if i < j { - let (before, after) = xs.split_at_mut(j); - (&mut before[i], &mut after[0]) - } else { - let (before, after) = xs.split_at_mut(i); - (&mut after[0], &mut before[j]) - } -} - -/// Return the given byte as its escaped string form. -fn escape(b: u8) -> String { - use std::ascii; - - String::from_utf8(ascii::escape_default(b).collect::>()).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let nfa: NFA = Builder::new() - .dense_depth(0) - // .match_kind(MatchKind::LeftmostShortest) - // .match_kind(MatchKind::LeftmostLongest) - .match_kind(MatchKind::LeftmostFirst) - // .build(&["abcd", "ce", "b"]) - // .build(&["ab", "bc"]) - // .build(&["b", "bcd", "ce"]) - // .build(&["abc", "bx"]) - // .build(&["abc", "bd", "ab"]) - // .build(&["abcdefghi", "hz", "abcdefgh"]) - // .build(&["abcd", "bce", "b"]) - .build(&["abcdefg", "bcde", "bcdef"]) - .unwrap(); - println!("{:?}", nfa); - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/api.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,625 +0,0 @@ -use std::u16; - -use crate::packed::pattern::Patterns; -use crate::packed::rabinkarp::RabinKarp; -use crate::packed::teddy::{self, Teddy}; -use crate::Match; - -/// This is a limit placed on the total number of patterns we're willing to try -/// and match at once. As more sophisticated algorithms are added, this number -/// may be increased. -const PATTERN_LIMIT: usize = 128; - -/// A knob for controlling the match semantics of a packed multiple string -/// searcher. -/// -/// This differs from the -/// [`MatchKind`](../enum.MatchKind.html) -/// type in the top-level crate module in that it doesn't support -/// "standard" match semantics, and instead only supports leftmost-first or -/// leftmost-longest. Namely, "standard" semantics cannot be easily supported -/// by packed searchers. -/// -/// For more information on the distinction between leftmost-first and -/// leftmost-longest, see the docs on the top-level `MatchKind` type. -/// -/// Unlike the top-level `MatchKind` type, the default match semantics for this -/// type are leftmost-first. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum MatchKind { - /// Use leftmost-first match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the match - /// corresponding to the pattern that appeared earlier when constructing - /// the automaton is reported. - /// - /// This is the default. - LeftmostFirst, - /// Use leftmost-longest match semantics, which reports leftmost matches. - /// When there are multiple possible leftmost matches, the longest match - /// is chosen. - LeftmostLongest, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, -} - -impl Default for MatchKind { - fn default() -> MatchKind { - MatchKind::LeftmostFirst - } -} - -/// The configuration for a packed multiple pattern searcher. -/// -/// The configuration is currently limited only to being able to select the -/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the -/// future, more knobs may be made available. -/// -/// A configuration produces a [`packed::Builder`](struct.Builder.html), which -/// in turn can be used to construct a -/// [`packed::Searcher`](struct.Searcher.html) for searching. -/// -/// # Example -/// -/// This example shows how to use leftmost-longest semantics instead of the -/// default (leftmost-first). -/// -/// ``` -/// use aho_corasick::packed::{Config, MatchKind}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Config::new() -/// .match_kind(MatchKind::LeftmostLongest) -/// .builder() -/// .add("foo") -/// .add("foobar") -/// .build()?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![1], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Config { - kind: MatchKind, - force: Option, - force_teddy_fat: Option, - force_avx: Option, -} - -/// An internal option for forcing the use of a particular packed algorithm. -/// -/// When an algorithm is forced, if a searcher could not be constructed for it, -/// then no searcher will be returned even if an alternative algorithm would -/// work. -#[derive(Clone, Debug)] -enum ForceAlgorithm { - Teddy, - RabinKarp, -} - -impl Default for Config { - fn default() -> Config { - Config::new() - } -} - -impl Config { - /// Create a new default configuration. A default configuration uses - /// leftmost-first match semantics. - pub fn new() -> Config { - Config { - kind: MatchKind::LeftmostFirst, - force: None, - force_teddy_fat: None, - force_avx: None, - } - } - - /// Create a packed builder from this configuration. The builder can be - /// used to accumulate patterns and create a - /// [`Searcher`](struct.Searcher.html) - /// from them. - pub fn builder(&self) -> Builder { - Builder::from_config(self.clone()) - } - - /// Set the match semantics for this configuration. - pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config { - self.kind = kind; - self - } - - /// An undocumented method for forcing the use of the Teddy algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_teddy(&mut self, yes: bool) -> &mut Config { - if yes { - self.force = Some(ForceAlgorithm::Teddy); - } else { - self.force = None; - } - self - } - - /// An undocumented method for forcing the use of the Fat Teddy algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_teddy_fat(&mut self, yes: Option) -> &mut Config { - self.force_teddy_fat = yes; - self - } - - /// An undocumented method for forcing the use of SSE (`Some(false)`) or - /// AVX (`Some(true)`) algorithms. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_avx(&mut self, yes: Option) -> &mut Config { - self.force_avx = yes; - self - } - - /// An undocumented method for forcing the use of the Rabin-Karp algorithm. - /// - /// This is only exposed for more precise testing and benchmarks. Callers - /// should not use it as it is not part of the API stability guarantees of - /// this crate. - #[doc(hidden)] - pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config { - if yes { - self.force = Some(ForceAlgorithm::RabinKarp); - } else { - self.force = None; - } - self - } -} - -/// A builder for constructing a packed searcher from a collection of patterns. -/// -/// # Example -/// -/// This example shows how to use a builder to construct a searcher. By -/// default, leftmost-first match semantics are used. -/// -/// ``` -/// use aho_corasick::packed::{Builder, MatchKind}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Builder::new() -/// .add("foobar") -/// .add("foo") -/// .build()?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![0], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Builder { - /// The configuration of this builder and subsequent matcher. - config: Config, - /// Set to true if the builder detects that a matcher cannot be built. - inert: bool, - /// The patterns provided by the caller. - patterns: Patterns, -} - -impl Builder { - /// Create a new builder for constructing a multi-pattern searcher. This - /// constructor uses the default configuration. - pub fn new() -> Builder { - Builder::from_config(Config::new()) - } - - fn from_config(config: Config) -> Builder { - Builder { config, inert: false, patterns: Patterns::new() } - } - - /// Build a searcher from the patterns added to this builder so far. - pub fn build(&self) -> Option { - if self.inert || self.patterns.is_empty() { - return None; - } - let mut patterns = self.patterns.clone(); - patterns.set_match_kind(self.config.kind); - let rabinkarp = RabinKarp::new(&patterns); - // Effectively, we only want to return a searcher if we can use Teddy, - // since Teddy is our only fast packed searcher at the moment. - // Rabin-Karp is only used when searching haystacks smaller than what - // Teddy can support. Thus, the only way to get a Rabin-Karp searcher - // is to force it using undocumented APIs (for tests/benchmarks). - let (search_kind, minimum_len) = match self.config.force { - None | Some(ForceAlgorithm::Teddy) => { - let teddy = match self.build_teddy(&patterns) { - None => return None, - Some(teddy) => teddy, - }; - let minimum_len = teddy.minimum_len(); - (SearchKind::Teddy(teddy), minimum_len) - } - Some(ForceAlgorithm::RabinKarp) => (SearchKind::RabinKarp, 0), - }; - Some(Searcher { patterns, rabinkarp, search_kind, minimum_len }) - } - - fn build_teddy(&self, patterns: &Patterns) -> Option { - teddy::Builder::new() - .avx(self.config.force_avx) - .fat(self.config.force_teddy_fat) - .build(&patterns) - } - - /// Add the given pattern to this set to match. - /// - /// The order in which patterns are added is significant. Namely, when - /// using leftmost-first match semantics, then when multiple patterns can - /// match at a particular location, the pattern that was added first is - /// used as the match. - /// - /// If the number of patterns added exceeds the amount supported by packed - /// searchers, then the builder will stop accumulating patterns and render - /// itself inert. At this point, constructing a searcher will always return - /// `None`. - pub fn add>(&mut self, pattern: P) -> &mut Builder { - if self.inert { - return self; - } else if self.patterns.len() >= PATTERN_LIMIT { - self.inert = true; - self.patterns.reset(); - return self; - } - // Just in case PATTERN_LIMIT increases beyond u16::MAX. - assert!(self.patterns.len() <= u16::MAX as usize); - - let pattern = pattern.as_ref(); - if pattern.is_empty() { - self.inert = true; - self.patterns.reset(); - return self; - } - self.patterns.add(pattern); - self - } - - /// Add the given iterator of patterns to this set to match. - /// - /// The iterator must yield elements that can be converted into a `&[u8]`. - /// - /// The order in which patterns are added is significant. Namely, when - /// using leftmost-first match semantics, then when multiple patterns can - /// match at a particular location, the pattern that was added first is - /// used as the match. - /// - /// If the number of patterns added exceeds the amount supported by packed - /// searchers, then the builder will stop accumulating patterns and render - /// itself inert. At this point, constructing a searcher will always return - /// `None`. - pub fn extend(&mut self, patterns: I) -> &mut Builder - where - I: IntoIterator, - P: AsRef<[u8]>, - { - for p in patterns { - self.add(p); - } - self - } -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -/// A packed searcher for quickly finding occurrences of multiple patterns. -/// -/// If callers need more flexible construction, or if one wants to change the -/// match semantics (either leftmost-first or leftmost-longest), then one can -/// use the [`Config`](struct.Config.html) and/or -/// [`Builder`](struct.Builder.html) types for more fine grained control. -/// -/// # Example -/// -/// This example shows how to create a searcher from an iterator of patterns. -/// By default, leftmost-first match semantics are used. -/// -/// ``` -/// use aho_corasick::packed::{MatchKind, Searcher}; -/// -/// # fn example() -> Option<()> { -/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -/// let matches: Vec = searcher -/// .find_iter("foobar") -/// .map(|mat| mat.pattern()) -/// .collect(); -/// assert_eq!(vec![0], matches); -/// # Some(()) } -/// # if cfg!(target_arch = "x86_64") { -/// # example().unwrap() -/// # } else { -/// # assert!(example().is_none()); -/// # } -/// ``` -#[derive(Clone, Debug)] -pub struct Searcher { - patterns: Patterns, - rabinkarp: RabinKarp, - search_kind: SearchKind, - minimum_len: usize, -} - -#[derive(Clone, Debug)] -enum SearchKind { - Teddy(Teddy), - RabinKarp, -} - -impl Searcher { - /// A convenience function for constructing a searcher from an iterator - /// of things that can be converted to a `&[u8]`. - /// - /// If a searcher could not be constructed (either because of an - /// unsupported CPU or because there are too many patterns), then `None` - /// is returned. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher - /// .find_iter("foobar") - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0], matches); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn new(patterns: I) -> Option - where - I: IntoIterator, - P: AsRef<[u8]>, - { - Builder::new().extend(patterns).build() - } - - /// Return the first occurrence of any of the patterns in this searcher, - /// according to its match semantics, in the given haystack. The `Match` - /// returned will include the identifier of the pattern that matched, which - /// corresponds to the index of the pattern (starting from `0`) in which it - /// was added. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let mat = searcher.find("foobar")?; - /// assert_eq!(0, mat.pattern()); - /// assert_eq!(0, mat.start()); - /// assert_eq!(6, mat.end()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find>(&self, haystack: B) -> Option { - self.find_at(haystack, 0) - } - - /// Return the first occurrence of any of the patterns in this searcher, - /// according to its match semantics, in the given haystack starting from - /// the given position. - /// - /// The `Match` returned will include the identifier of the pattern that - /// matched, which corresponds to the index of the pattern (starting from - /// `0`) in which it was added. The offsets in the `Match` will be relative - /// to the start of `haystack` (and not `at`). - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let mat = searcher.find_at("foofoobar", 3)?; - /// assert_eq!(0, mat.pattern()); - /// assert_eq!(3, mat.start()); - /// assert_eq!(9, mat.end()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find_at>( - &self, - haystack: B, - at: usize, - ) -> Option { - let haystack = haystack.as_ref(); - match self.search_kind { - SearchKind::Teddy(ref teddy) => { - if haystack[at..].len() < teddy.minimum_len() { - return self.slow_at(haystack, at); - } - teddy.find_at(&self.patterns, haystack, at) - } - SearchKind::RabinKarp => { - self.rabinkarp.find_at(&self.patterns, haystack, at) - } - } - } - - /// Return an iterator of non-overlapping occurrences of the patterns in - /// this searcher, according to its match semantics, in the given haystack. - /// - /// # Example - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// let matches: Vec = searcher - /// .find_iter("foobar fooba foofoo") - /// .map(|mat| mat.pattern()) - /// .collect(); - /// assert_eq!(vec![0, 1, 1, 1], matches); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( - &'a self, - haystack: &'b B, - ) -> FindIter<'a, 'b> { - FindIter { searcher: self, haystack: haystack.as_ref(), at: 0 } - } - - /// Returns the match kind used by this packed searcher. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use aho_corasick::packed::{MatchKind, Searcher}; - /// - /// # fn example() -> Option<()> { - /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; - /// // leftmost-first is the default. - /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind()); - /// # Some(()) } - /// # if cfg!(target_arch = "x86_64") { - /// # example().unwrap() - /// # } else { - /// # assert!(example().is_none()); - /// # } - /// ``` - pub fn match_kind(&self) -> &MatchKind { - self.patterns.match_kind() - } - - /// Returns the minimum length of a haystack that is required in order for - /// packed searching to be effective. - /// - /// In some cases, the underlying packed searcher may not be able to search - /// very short haystacks. When that occurs, the implementation will defer - /// to a slower non-packed searcher (which is still generally faster than - /// Aho-Corasick for a small number of patterns). However, callers may - /// want to avoid ever using the slower variant, which one can do by - /// never passing a haystack shorter than the minimum length returned by - /// this method. - pub fn minimum_len(&self) -> usize { - self.minimum_len - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - self.patterns.heap_bytes() - + self.rabinkarp.heap_bytes() - + self.search_kind.heap_bytes() - } - - /// Use a slow (non-packed) searcher. - /// - /// This is useful when a packed searcher could be constructed, but could - /// not be used to search a specific haystack. For example, if Teddy was - /// built but the haystack is smaller than ~34 bytes, then Teddy might not - /// be able to run. - fn slow_at(&self, haystack: &[u8], at: usize) -> Option { - self.rabinkarp.find_at(&self.patterns, haystack, at) - } -} - -impl SearchKind { - fn heap_bytes(&self) -> usize { - match *self { - SearchKind::Teddy(ref ted) => ted.heap_bytes(), - SearchKind::RabinKarp => 0, - } - } -} - -/// An iterator over non-overlapping matches from a packed searcher. -/// -/// The lifetime `'s` refers to the lifetime of the underlying -/// [`Searcher`](struct.Searcher.html), while the lifetime `'h` refers to the -/// lifetime of the haystack being searched. -#[derive(Debug)] -pub struct FindIter<'s, 'h> { - searcher: &'s Searcher, - haystack: &'h [u8], - at: usize, -} - -impl<'s, 'h> Iterator for FindIter<'s, 'h> { - type Item = Match; - - fn next(&mut self) -> Option { - if self.at > self.haystack.len() { - return None; - } - match self.searcher.find_at(&self.haystack, self.at) { - None => None, - Some(c) => { - self.at = c.end; - Some(c) - } - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/mod.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -/*! -A lower level API for packed multiple substring search, principally for a small -number of patterns. - -This sub-module provides vectorized routines for quickly finding matches of a -small number of patterns. In general, users of this crate shouldn't need to -interface with this module directly, as the primary -[`AhoCorasick`](../struct.AhoCorasick.html) -searcher will use these routines automatically as a prefilter when applicable. -However, in some cases, callers may want to bypass the Aho-Corasick machinery -entirely and use this vectorized searcher directly. - -# Overview - -The primary types in this sub-module are: - -* [`Searcher`](struct.Searcher.html) executes the actual search algorithm to - report matches in a haystack. -* [`Builder`](struct.Builder.html) accumulates patterns incrementally and can - construct a `Searcher`. -* [`Config`](struct.Config.html) permits tuning the searcher, and itself will - produce a `Builder` (which can then be used to build a `Searcher`). - Currently, the only tuneable knob are the match semantics, but this may be - expanded in the future. - -# Examples - -This example shows how to create a searcher from an iterator of patterns. -By default, leftmost-first match semantics are used. (See the top-level -[`MatchKind`](../enum.MatchKind.html) type for more details about match -semantics, which apply similarly to packed substring search.) - -``` -use aho_corasick::packed::{MatchKind, Searcher}; - -# fn example() -> Option<()> { -let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?; -let matches: Vec = searcher - .find_iter("foobar") - .map(|mat| mat.pattern()) - .collect(); -assert_eq!(vec![0], matches); -# Some(()) } -# if cfg!(target_arch = "x86_64") { -# example().unwrap() -# } else { -# assert!(example().is_none()); -# } -``` - -This example shows how to use [`Config`](struct.Config.html) to change the -match semantics to leftmost-longest: - -``` -use aho_corasick::packed::{Config, MatchKind}; - -# fn example() -> Option<()> { -let searcher = Config::new() - .match_kind(MatchKind::LeftmostLongest) - .builder() - .add("foo") - .add("foobar") - .build()?; -let matches: Vec = searcher - .find_iter("foobar") - .map(|mat| mat.pattern()) - .collect(); -assert_eq!(vec![1], matches); -# Some(()) } -# if cfg!(target_arch = "x86_64") { -# example().unwrap() -# } else { -# assert!(example().is_none()); -# } -``` - -# Packed substring searching - -Packed substring searching refers to the use of SIMD (Single Instruction, -Multiple Data) to accelerate the detection of matches in a haystack. Unlike -conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring -search tend to do better with a small number of patterns, where as Aho-Corasick -generally maintains reasonably consistent performance regardless of the number -of patterns you give it. Because of this, the vectorized searcher in this -sub-module cannot be used as a general purpose searcher, since building the -searcher may fail. However, in exchange, when searching for a small number of -patterns, searching can be quite a bit faster than Aho-Corasick (sometimes by -an order of magnitude). - -The key take away here is that constructing a searcher from a list of patterns -is a fallible operation. While the precise conditions under which building a -searcher can fail is specifically an implementation detail, here are some -common reasons: - -* Too many patterns were given. Typically, the limit is on the order of 100 or - so, but this limit may fluctuate based on available CPU features. -* The available packed algorithms require CPU features that aren't available. - For example, currently, this crate only provides packed algorithms for - `x86_64`. Therefore, constructing a packed searcher on any other target - (e.g., ARM) will always fail. -* Zero patterns were given, or one of the patterns given was empty. Packed - searchers require at least one pattern and that all patterns are non-empty. -* Something else about the nature of the patterns (typically based on - heuristics) suggests that a packed searcher would perform very poorly, so - no searcher is built. -*/ - -pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher}; - -mod api; -mod pattern; -mod rabinkarp; -mod teddy; -#[cfg(test)] -mod tests; -#[cfg(target_arch = "x86_64")] -mod vector; diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/pattern.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,318 +0,0 @@ -use std::cmp; -use std::fmt; -use std::mem; -use std::u16; -use std::usize; - -use crate::packed::api::MatchKind; - -/// The type used for representing a pattern identifier. -/// -/// We don't use `usize` here because our packed searchers don't scale to -/// huge numbers of patterns, so we keep things a bit smaller. -pub type PatternID = u16; - -/// A non-empty collection of non-empty patterns to search for. -/// -/// This collection of patterns is what is passed around to both execute -/// searches and to construct the searchers themselves. Namely, this permits -/// searches to avoid copying all of the patterns, and allows us to keep only -/// one copy throughout all packed searchers. -/// -/// Note that this collection is not a set. The same pattern can appear more -/// than once. -#[derive(Clone, Debug)] -pub struct Patterns { - /// The match semantics supported by this collection of patterns. - /// - /// The match semantics determines the order of the iterator over patterns. - /// For leftmost-first, patterns are provided in the same order as were - /// provided by the caller. For leftmost-longest, patterns are provided in - /// descending order of length, with ties broken by the order in which they - /// were provided by the caller. - kind: MatchKind, - /// The collection of patterns, indexed by their identifier. - by_id: Vec>, - /// The order of patterns defined for iteration, given by pattern - /// identifiers. The order of `by_id` and `order` is always the same for - /// leftmost-first semantics, but may be different for leftmost-longest - /// semantics. - order: Vec, - /// The length of the smallest pattern, in bytes. - minimum_len: usize, - /// The largest pattern identifier. This should always be equivalent to - /// the number of patterns minus one in this collection. - max_pattern_id: PatternID, - /// The total number of pattern bytes across the entire collection. This - /// is used for reporting total heap usage in constant time. - total_pattern_bytes: usize, -} - -impl Patterns { - /// Create a new collection of patterns for the given match semantics. The - /// ID of each pattern is the index of the pattern at which it occurs in - /// the `by_id` slice. - /// - /// If any of the patterns in the slice given are empty, then this panics. - /// Similarly, if the number of patterns given is zero, then this also - /// panics. - pub fn new() -> Patterns { - Patterns { - kind: MatchKind::default(), - by_id: vec![], - order: vec![], - minimum_len: usize::MAX, - max_pattern_id: 0, - total_pattern_bytes: 0, - } - } - - /// Add a pattern to this collection. - /// - /// This panics if the pattern given is empty. - pub fn add(&mut self, bytes: &[u8]) { - assert!(!bytes.is_empty()); - assert!(self.by_id.len() <= u16::MAX as usize); - - let id = self.by_id.len() as u16; - self.max_pattern_id = id; - self.order.push(id); - self.by_id.push(bytes.to_vec()); - self.minimum_len = cmp::min(self.minimum_len, bytes.len()); - self.total_pattern_bytes += bytes.len(); - } - - /// Set the match kind semantics for this collection of patterns. - /// - /// If the kind is not set, then the default is leftmost-first. - pub fn set_match_kind(&mut self, kind: MatchKind) { - match kind { - MatchKind::LeftmostFirst => { - self.order.sort(); - } - MatchKind::LeftmostLongest => { - let (order, by_id) = (&mut self.order, &mut self.by_id); - order.sort_by(|&id1, &id2| { - by_id[id1 as usize] - .len() - .cmp(&by_id[id2 as usize].len()) - .reverse() - }); - } - MatchKind::__Nonexhaustive => unreachable!(), - } - } - - /// Return the number of patterns in this collection. - /// - /// This is guaranteed to be greater than zero. - pub fn len(&self) -> usize { - self.by_id.len() - } - - /// Returns true if and only if this collection of patterns is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the approximate total amount of heap used by these patterns, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - self.order.len() * mem::size_of::() - + self.by_id.len() * mem::size_of::>() - + self.total_pattern_bytes - } - - /// Clears all heap memory associated with this collection of patterns and - /// resets all state such that it is a valid empty collection. - pub fn reset(&mut self) { - self.kind = MatchKind::default(); - self.by_id.clear(); - self.order.clear(); - self.minimum_len = usize::MAX; - self.max_pattern_id = 0; - } - - /// Return the maximum pattern identifier in this collection. This can be - /// useful in searchers for ensuring that the collection of patterns they - /// are provided at search time and at build time have the same size. - pub fn max_pattern_id(&self) -> PatternID { - assert_eq!((self.max_pattern_id + 1) as usize, self.len()); - self.max_pattern_id - } - - /// Returns the length, in bytes, of the smallest pattern. - /// - /// This is guaranteed to be at least one. - pub fn minimum_len(&self) -> usize { - self.minimum_len - } - - /// Returns the match semantics used by these patterns. - pub fn match_kind(&self) -> &MatchKind { - &self.kind - } - - /// Return the pattern with the given identifier. If such a pattern does - /// not exist, then this panics. - pub fn get(&self, id: PatternID) -> Pattern<'_> { - Pattern(&self.by_id[id as usize]) - } - - /// Return the pattern with the given identifier without performing bounds - /// checks. - /// - /// # Safety - /// - /// Callers must ensure that a pattern with the given identifier exists - /// before using this method. - #[cfg(target_arch = "x86_64")] - pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { - Pattern(self.by_id.get_unchecked(id as usize)) - } - - /// Return an iterator over all the patterns in this collection, in the - /// order in which they should be matched. - /// - /// Specifically, in a naive multi-pattern matcher, the following is - /// guaranteed to satisfy the match semantics of this collection of - /// patterns: - /// - /// ```ignore - /// for i in 0..haystack.len(): - /// for p in patterns.iter(): - /// if haystack[i..].starts_with(p.bytes()): - /// return Match(p.id(), i, i + p.bytes().len()) - /// ``` - /// - /// Namely, among the patterns in a collection, if they are matched in - /// the order provided by this iterator, then the result is guaranteed - /// to satisfy the correct match semantics. (Either leftmost-first or - /// leftmost-longest.) - pub fn iter(&self) -> PatternIter<'_> { - PatternIter { patterns: self, i: 0 } - } -} - -/// An iterator over the patterns in the `Patterns` collection. -/// -/// The order of the patterns provided by this iterator is consistent with the -/// match semantics of the originating collection of patterns. -/// -/// The lifetime `'p` corresponds to the lifetime of the collection of patterns -/// this is iterating over. -#[derive(Debug)] -pub struct PatternIter<'p> { - patterns: &'p Patterns, - i: usize, -} - -impl<'p> Iterator for PatternIter<'p> { - type Item = (PatternID, Pattern<'p>); - - fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> { - if self.i >= self.patterns.len() { - return None; - } - let id = self.patterns.order[self.i]; - let p = self.patterns.get(id); - self.i += 1; - Some((id, p)) - } -} - -/// A pattern that is used in packed searching. -#[derive(Clone)] -pub struct Pattern<'a>(&'a [u8]); - -impl<'a> fmt::Debug for Pattern<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Pattern") - .field("lit", &String::from_utf8_lossy(&self.0)) - .finish() - } -} - -impl<'p> Pattern<'p> { - /// Returns the length of this pattern, in bytes. - pub fn len(&self) -> usize { - self.0.len() - } - - /// Returns the bytes of this pattern. - pub fn bytes(&self) -> &[u8] { - &self.0 - } - - /// Returns the first `len` low nybbles from this pattern. If this pattern - /// is shorter than `len`, then this panics. - #[cfg(target_arch = "x86_64")] - pub fn low_nybbles(&self, len: usize) -> Vec { - let mut nybs = vec![]; - for &b in self.bytes().iter().take(len) { - nybs.push(b & 0xF); - } - nybs - } - - /// Returns true if this pattern is a prefix of the given bytes. - #[inline(always)] - pub fn is_prefix(&self, bytes: &[u8]) -> bool { - self.len() <= bytes.len() && self.equals(&bytes[..self.len()]) - } - - /// Returns true if and only if this pattern equals the given bytes. - #[inline(always)] - pub fn equals(&self, bytes: &[u8]) -> bool { - // Why not just use memcmp for this? Well, memcmp requires calling out - // to libc, and this routine is called in fairly hot code paths. Other - // than just calling out to libc, it also seems to result in worse - // codegen. By rolling our own memcpy in pure Rust, it seems to appear - // more friendly to the optimizer. - // - // This results in an improvement in just about every benchmark. Some - // smaller than others, but in some cases, up to 30% faster. - - if self.len() != bytes.len() { - return false; - } - if self.len() < 8 { - for (&b1, &b2) in self.bytes().iter().zip(bytes) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 8 or more bytes to compare, then proceed in chunks of - // 8 at a time using unaligned loads. - let mut p1 = self.bytes().as_ptr(); - let mut p2 = bytes.as_ptr(); - let p1end = self.bytes()[self.len() - 8..].as_ptr(); - let p2end = bytes[bytes.len() - 8..].as_ptr(); - // SAFETY: Via the conditional above, we know that both `p1` and `p2` - // have the same length, so `p1 < p1end` implies that `p2 < p2end`. - // Thus, derefencing both `p1` and `p2` in the loop below is safe. - // - // Moreover, we set `p1end` and `p2end` to be 8 bytes before the actual - // end of of `p1` and `p2`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. - // - // Finally, we needn't worry about 64-bit alignment here, since we - // do unaligned loads. - unsafe { - while p1 < p1end { - let v1 = (p1 as *const u64).read_unaligned(); - let v2 = (p2 as *const u64).read_unaligned(); - if v1 != v2 { - return false; - } - p1 = p1.add(8); - p2 = p2.add(8); - } - let v1 = (p1end as *const u64).read_unaligned(); - let v2 = (p2end as *const u64).read_unaligned(); - v1 == v2 - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/rabinkarp.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,185 +0,0 @@ -use std::mem; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::Match; - -/// The type of the rolling hash used in the Rabin-Karp algorithm. -type Hash = usize; - -/// The number of buckets to store our patterns in. We don't want this to be -/// too big in order to avoid wasting memory, but we don't want it to be too -/// small either to avoid spending too much time confirming literals. -/// -/// The number of buckets MUST be a power of two. Otherwise, determining the -/// bucket from a hash will slow down the code considerably. Using a power -/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and` -/// instruction. -const NUM_BUCKETS: usize = 64; - -/// An implementation of the Rabin-Karp algorithm. The main idea of this -/// algorithm is to maintain a rolling hash as it moves through the input, and -/// then check whether that hash corresponds to the same hash for any of the -/// patterns we're looking for. -/// -/// A draw back of naively scaling Rabin-Karp to multiple patterns is that -/// it requires all of the patterns to be the same length, which in turn -/// corresponds to the number of bytes to hash. We adapt this to work for -/// multiple patterns of varying size by fixing the number of bytes to hash -/// to be the length of the smallest pattern. We also split the patterns into -/// several buckets to hopefully make the confirmation step faster. -/// -/// Wikipedia has a decent explanation, if a bit heavy on the theory: -/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm -/// -/// But ESMAJ provides something a bit more concrete: -/// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html -#[derive(Clone, Debug)] -pub struct RabinKarp { - /// The order of patterns in each bucket is significant. Namely, they are - /// arranged such that the first one to match is the correct match. This - /// may not necessarily correspond to the order provided by the caller. - /// For example, if leftmost-longest semantics are used, then the patterns - /// are sorted by their length in descending order. If leftmost-first - /// semantics are used, then the patterns are sorted by their pattern ID - /// in ascending order (which corresponds to the caller's order). - buckets: Vec>, - /// The length of the hashing window. Generally, this corresponds to the - /// length of the smallest pattern. - hash_len: usize, - /// The factor to subtract out of a hash before updating it with a new - /// byte. - hash_2pow: usize, - /// The maximum identifier of a pattern. This is used as a sanity check - /// to ensure that the patterns provided by the caller are the same as - /// the patterns that were used to compile the matcher. This sanity check - /// possibly permits safely eliminating bounds checks regardless of what - /// patterns are provided by the caller. - /// - /// (Currently, we don't use this to elide bounds checks since it doesn't - /// result in a measurable performance improvement, but we do use it for - /// better failure modes.) - max_pattern_id: PatternID, -} - -impl RabinKarp { - /// Compile a new Rabin-Karp matcher from the patterns given. - /// - /// This panics if any of the patterns in the collection are empty, or if - /// the collection is itself empty. - pub fn new(patterns: &Patterns) -> RabinKarp { - assert!(patterns.len() >= 1); - let hash_len = patterns.minimum_len(); - assert!(hash_len >= 1); - - let mut hash_2pow = 1usize; - for _ in 1..hash_len { - hash_2pow = hash_2pow.wrapping_shl(1); - } - - let mut rk = RabinKarp { - buckets: vec![vec![]; NUM_BUCKETS], - hash_len, - hash_2pow, - max_pattern_id: patterns.max_pattern_id(), - }; - for (id, pat) in patterns.iter() { - let hash = rk.hash(&pat.bytes()[..rk.hash_len]); - let bucket = hash % NUM_BUCKETS; - rk.buckets[bucket].push((hash, id)); - } - rk - } - - /// Return the first matching pattern in the given haystack, begining the - /// search at `at`. - pub fn find_at( - &self, - patterns: &Patterns, - haystack: &[u8], - mut at: usize, - ) -> Option { - assert_eq!(NUM_BUCKETS, self.buckets.len()); - assert_eq!( - self.max_pattern_id, - patterns.max_pattern_id(), - "Rabin-Karp must be called with same patterns it was built with", - ); - - if at + self.hash_len > haystack.len() { - return None; - } - let mut hash = self.hash(&haystack[at..at + self.hash_len]); - loop { - let bucket = &self.buckets[hash % NUM_BUCKETS]; - for &(phash, pid) in bucket { - if phash == hash { - if let Some(c) = self.verify(patterns, pid, haystack, at) { - return Some(c); - } - } - } - if at + self.hash_len >= haystack.len() { - return None; - } - hash = self.update_hash( - hash, - haystack[at], - haystack[at + self.hash_len], - ); - at += 1; - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::<(Hash, PatternID)>() - } - - /// Verify whether the pattern with the given id matches at - /// `haystack[at..]`. - /// - /// We tag this function as `cold` because it helps improve codegen. - /// Intuitively, it would seem like inlining it would be better. However, - /// the only time this is called and a match is not found is when there - /// there is a hash collision, or when a prefix of a pattern matches but - /// the entire pattern doesn't match. This is hopefully fairly rare, and - /// if it does occur a lot, it's going to be slow no matter what we do. - #[cold] - fn verify( - &self, - patterns: &Patterns, - id: PatternID, - haystack: &[u8], - at: usize, - ) -> Option { - let pat = patterns.get(id); - if pat.is_prefix(&haystack[at..]) { - Some(Match::from_span(id as usize, at, at + pat.len())) - } else { - None - } - } - - /// Hash the given bytes. - fn hash(&self, bytes: &[u8]) -> Hash { - assert_eq!(self.hash_len, bytes.len()); - - let mut hash = 0usize; - for &b in bytes { - hash = hash.wrapping_shl(1).wrapping_add(b as usize); - } - hash - } - - /// Update the hash given based on removing `old_byte` at the beginning - /// of some byte string, and appending `new_byte` to the end of that same - /// byte string. - fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash { - prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow)) - .wrapping_shl(1) - .wrapping_add(new_byte as usize) - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,386 +0,0 @@ -Teddy is a SIMD accelerated multiple substring matching algorithm. The name -and the core ideas in the algorithm were learned from the [Hyperscan][1_u] -project. The implementation in this repository was mostly motivated for use in -accelerating regex searches by searching for small sets of required literals -extracted from the regex. - - -# Background - -The key idea of Teddy is to do *packed* substring matching. In the literature, -packed substring matching is the idea of examining multiple bytes in a haystack -at a time to detect matches. Implementations of, for example, memchr (which -detects matches of a single byte) have been doing this for years. Only -recently, with the introduction of various SIMD instructions, has this been -extended to substring matching. The PCMPESTRI instruction (and its relatives), -for example, implements substring matching in hardware. It is, however, limited -to substrings of length 16 bytes or fewer, but this restriction is fine in a -regex engine, since we rarely care about the performance difference between -searching for a 16 byte literal and a 16 + N literal; 16 is already long -enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs -at least, is its latency and throughput. As a result, it is often faster to -do substring search with a Boyer-Moore (or Two-Way) variant and a well placed -memchr to quickly skip through the haystack. - -There are fewer results from the literature on packed substring matching, -and even fewer for packed multiple substring matching. Ben-Kiki et al. [2] -describes use of PCMPESTRI for substring matching, but is mostly theoretical -and hand-waves performance. There is other theoretical work done by Bille [3] -as well. - -The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci -and is generally focused on multiple pattern search. Their first paper [4a] -introduces the concept of a fingerprint, which is computed for every block of -N bytes in every pattern. The haystack is then scanned N bytes at a time and -a fingerprint is computed in the same way it was computed for blocks in the -patterns. If the fingerprint corresponds to one that was found in a pattern, -then a verification step follows to confirm that one of the substrings with the -corresponding fingerprint actually matches at the current location. Various -implementation tricks are employed to make sure the fingerprint lookup is fast; -typically by truncating the fingerprint. (This may, of course, provoke more -steps in the verification process, so a balance must be struck.) - -The main downside of [4a] is that the minimum substring length is 32 bytes, -presumably because of how the algorithm uses certain SIMD instructions. This -essentially makes it useless for general purpose regex matching, where a small -number of short patterns is far more likely. - -Faro and Kulekci published another paper [4b] that is conceptually very similar -to [4a]. The key difference is that it uses the CRC32 instruction (introduced -as part of SSE 4.2) to compute fingerprint values. This also enables the -algorithm to work effectively on substrings as short as 7 bytes with 4 byte -windows. 7 bytes is unfortunately still too long. The window could be -technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the -small window size ends up negating most performance benefits—and it's likely -the common case in a general purpose regex engine. - -Faro and Kulekci also published [4c] that appears to be intended as a -replacement to using PCMPESTRI. In particular, it is specifically motivated by -the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD -instructions that are faster. While this approach works for short substrings, -I personally couldn't see a way to generalize it to multiple substring search. - -Faro and Kulekci have another paper [4d] that I haven't been able to read -because it is behind a paywall. - - -# Teddy - -Finally, we get to Teddy. If the above literature review is complete, then it -appears that Teddy is a novel algorithm. More than that, in my experience, it -completely blows away the competition for short substrings, which is exactly -what we want in a general purpose regex engine. Again, the algorithm appears -to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced -late 2015, and no earlier history could be found. Therefore, tracking the exact -provenance of the algorithm with respect to the published literature seems -difficult. - -At a high level, Teddy works somewhat similarly to the fingerprint algorithms -published by Faro and Kulekci, but Teddy does it in a way that scales a bit -better. Namely: - -1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX) - byte chunks. 16 (or 32) is significant because it corresponds to the number - of bytes in a SIMD vector. -2. Bitwise operations are performed on each chunk to discover if any region of - it matches a set of precomputed fingerprints from the patterns. If there are - matches, then a verification step is performed. In this implementation, our - verification step is naive. This can be improved upon. - -The details to make this work are quite clever. First, we must choose how to -pick our fingerprints. In Hyperscan's implementation, I *believe* they use the -last N bytes of each substring, where N must be at least the minimum length of -any substring in the set being searched. In this implementation, we use the -first N bytes of each substring. (The tradeoffs between these choices aren't -yet clear to me.) We then must figure out how to quickly test whether an -occurrence of any fingerprint from the set of patterns appears in a 16 byte -block from the haystack. To keep things simple, let's assume N = 1 and examine -some examples to motivate the approach. Here are our patterns: - -```ignore -foo -bar -baz -``` - -The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set -our 16 byte block to: - -```ignore -bat cat foo bump -xxxxxxxxxxxxxxxx -``` - -To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates -a mask that allows us to quickly compute membership of a fingerprint in a 16 -byte block that also tells which pattern the fingerprint corresponds to. In -this case, our fingerprint is a single byte, so an appropriate abstraction is -a map from a single byte to a list of patterns that contain that fingerprint: - -```ignore -f |--> foo -b |--> bar, baz -``` - -Now, all we need to do is figure out how to represent this map in vector space -and use normal SIMD operations to perform a lookup. The first simplification -we can make is to represent our patterns as bit fields occupying a single -byte. This is important, because a single SIMD vector can store 16 bytes. - -```ignore -f |--> 00000001 -b |--> 00000010, 00000100 -``` - -How do we perform lookup though? It turns out that SSSE3 introduced a very cool -instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`, -and returns a third vector `C`. All vectors are treated as 16 8-bit integers. -`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true -for the purposes of this algorithm. For full details, see [Intel's Intrinsics -Guide][5_u].) This essentially lets us use the values in `B` to lookup values -in `A`. - -If we could somehow cause `B` to contain our 16 byte block from the haystack, -and if `A` could contain our bitmasks, then we'd end up with something like -this for `A`: - -```ignore - 0x00 0x01 ... 0x62 ... 0x66 ... 0xFF -A = 0 0 00000110 00000001 0 -``` - -And if `B` contains our window from our haystack, we could use shuffle to take -the values from `B` and use them to look up our bitsets in `A`. But of course, -we can't do this because `A` in the above example contains 256 bytes, which -is much larger than the size of a SIMD vector. - -Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of -our bitsets, we can use two masks, where one mask corresponds to the lower four -bits of our fingerprint and the other mask corresponds to the upper four bits. -So our map now looks like: - -```ignore -'f' & 0xF = 0x6 |--> 00000001 -'f' >> 4 = 0x6 |--> 00000111 -'b' & 0xF = 0x2 |--> 00000110 -'b' >> 4 = 0x6 |--> 00000111 -``` - -Notice that the bitsets for each nybble correspond to the union of all -fingerprints that contain that nybble. For example, both `f` and `b` have the -same upper 4 bits but differ on the lower 4 bits. Putting this together, we -have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is -our mask for the upper nybble and `B` is our 16 byte block from the haystack: - -```ignore - 0x00 0x01 0x02 0x03 ... 0x06 ... 0xF -A0 = 0 0 00000110 0 00000001 0 -A1 = 0 0 0 0 00000111 0 -B = b a t _ t p -B = 0x62 0x61 0x74 0x20 0x74 0x70 -``` - -But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits, -and we need indexes that are at most 4 bits (corresponding to one of 16 -values). We can apply the same transformation to split `B` into lower and upper -nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and -`B1` corresponds to the upper nybbles: - -```ignore - b a t _ c a t _ f o o _ b u m p -B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0 -B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7 -``` - -And now we have a nice correspondence. `B0` can index `A0` and `B1` can index -`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`: - -```ignore - b a ... f o ... p - A0[0x2] A0[0x1] A0[0x6] A0[0xF] A0[0x0] -C0 = 00000110 0 00000001 0 0 -``` - -And `C1 = PSHUFB(A1, B1)`: - -```ignore - b a ... f o ... p - A1[0x6] A1[0x6] A1[0x6] A1[0x6] A1[0x7] -C1 = 00000111 00000111 00000111 00000111 0 -``` - -Notice how neither one of `C0` or `C1` is guaranteed to report fully correct -results all on its own. For example, `C1` claims that `b` is a fingerprint for -the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint -for all of our patterns. But if we combined `C0` and `C1` with an `AND` -operation: - -```ignore - b a ... f o ... p -C = 00000110 0 00000001 0 0 -``` - -Then we now have that `C[i]` contains a bitset corresponding to the matching -fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that -block. - -Once we have that, we can look for the position of the least significant bit -in `C`. (Least significant because we only target `x86_64` here, which is -always little endian. Thus, the least significant bytes correspond to bytes -in our haystack at a lower address.) That position, modulo `8`, gives us -the pattern that the fingerprint matches. That position, integer divided by -`8`, also gives us the byte offset that the fingerprint occurs in inside the -16 byte haystack block. Using those two pieces of information, we can run a -verification procedure that tries to match all substrings containing that -fingerprint at that position in the haystack. - - -# Implementation notes - -The problem with the algorithm as described above is that it uses a single byte -for a fingerprint. This will work well if the fingerprints are rare in the -haystack (e.g., capital letters or special characters in normal English text), -but if the fingerprints are common, you'll wind up spending too much time in -the verification step, which effectively negates the performance benefits of -scanning 16 bytes at a time. Remember, the key to the performance of this -algorithm is to do as little work as possible per 16 (or 32) bytes. - -This algorithm can be extrapolated in a relatively straight-forward way to use -larger fingerprints. That is, instead of a single byte prefix, we might use a -two or three byte prefix. The implementation here implements N = {1, 2, 3} -and always picks the largest N possible. The rationale is that the bigger the -fingerprint, the fewer verification steps we'll do. Of course, if N is too -large, then we'll end up doing too much on each step. - -The way to extend it is: - -1. Add a mask for each byte in the fingerprint. (Remember that each mask is - composed of two SIMD vectors.) This results in a value of `C` for each byte - in the fingerprint while searching. -2. When testing each 16 (or 32) byte block, each value of `C` must be shifted - so that they are aligned. Once aligned, they should all be `AND`'d together. - This will give you only the bitsets corresponding to the full match of the - fingerprint. To do this, one needs to save the last byte (for N=2) or last - two bytes (for N=3) from the previous iteration, and then line them up with - the first one or two bytes of the next iteration. - -## Verification - -Verification generally follows the procedure outlined above. The tricky parts -are in the right formulation of operations to get our bits out of our vectors. -We have a limited set of operations available to us on SIMD vectors as 128-bit -or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers -from our vectors, and then run our verification step on each of those. The -verification step looks at the least significant bit set, and from its -position, we can derive the byte offset and bucket. (Again, as described -above.) Once we know the bucket, we do a fairly naive exhaustive search for -every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash -table, but I haven't had time to thoroughly explore that. A few initial -half-hearted attempts resulted in worse performance.) - -## AVX - -The AVX version of Teddy extrapolates almost perfectly from the SSE version. -The only hickup is that PALIGNR is used to align chunks in the 16-bit version, -and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it -only works within 128-bit lanes. So there's a bit of tomfoolery to get around -this by shuffling the vectors before calling VPALIGNR. - -The only other aspect to AVX is that since our masks are still fundamentally -16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to -32-byte chunks. - -## Fat Teddy - -In the version of Teddy described above, 8 buckets are used to group patterns -that we want to search for. However, when AVX is available, we can extend the -number of buckets to 16 by permitting each byte in our masks to use 16-bits -instead of 8-bits to represent the buckets it belongs to. (This variant is also -in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a -time, even though we're using AVX. Instead, we have to scan 16 bytes at a time. -What we gain, though, is (hopefully) less work in our verification routine. -It patterns are more spread out across more buckets, then there should overall -be fewer false positives. In general, Fat Teddy permits us to grow our capacity -a bit and search for more literals before Teddy gets overwhelmed. - -The tricky part of Fat Teddy is in how we adjust our masks and our verification -procedure. For the masks, we simply represent the first 8 buckets in each of -the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes. -Then, in the search loop, instead of loading 32 bytes from the haystack, we -load the same 16 bytes from the haystack into both the low and high 16 byte -portions of our 256-bit vector. So for example, a mask might look like this: - - bits: 00100001 00000000 ... 11000000 00000000 00000001 ... 00000000 - byte: 31 30 16 15 14 0 - offset: 15 14 0 15 14 0 - buckets: 8-15 8-15 8-15 0-7 0-7 0-7 - -Where `byte` is the position in the vector (higher numbers corresponding to -more significant bits), `offset` is the corresponding position in the haystack -chunk, and `buckets` corresponds to the bucket assignments for that particular -byte. - -In particular, notice that the bucket assignments for offset `0` are spread -out between bytes `0` and `16`. This works well for the chunk-by-chunk search -procedure, but verification really wants to process all bucket assignments for -each offset at once. Otherwise, we might wind up finding a match at offset -`1` in one the first 8 buckets, when we really should have reported a match -at offset `0` in one of the second 8 buckets. (Because we want the leftmost -match.) - -Thus, for verification, we rearrange the above vector such that it is a -sequence of 16-bit integers, where the least significant 16-bit integer -corresponds to all of the bucket assignments for offset `0`. So with the -above vector, the least significant 16-bit integer would be - - 11000000 000000 - -which was taken from bytes `16` and `0`. Then the verification step pretty much -runs as described, except with 16 buckets instead of 8. - - -# References - -- **[1]** [Hyperscan on GitHub](https://github.com/intel/hyperscan), - [webpage](https://www.hyperscan.io/) -- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R., - & Weimann, O. (2011). - _Optimal packed string matching_. - In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13). - Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik. - DOI: 10.4230/LIPIcs.FSTTCS.2011.423. - [PDF](https://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf). -- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R., - & Weimann, O. (2014). - _Towards optimal packed string matching_. - Theoretical Computer Science, 525, 111-129. - DOI: 10.1016/j.tcs.2013.06.013. - [PDF](https://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf). -- **[3]** Bille, P. (2011). - _Fast searching in packed strings_. - Journal of Discrete Algorithms, 9(1), 49-56. - DOI: 10.1016/j.jda.2010.09.003. - [PDF](https://www.sciencedirect.com/science/article/pii/S1570866710000353). -- **[4a]** Faro, S., & Külekci, M. O. (2012, October). - _Fast multiple string matching using streaming SIMD extensions technology_. - In String Processing and Information Retrieval (pp. 217-228). - Springer Berlin Heidelberg. - DOI: 10.1007/978-3-642-34109-0_23. - [PDF](https://www.dmi.unict.it/faro/papers/conference/faro32.pdf). -- **[4b]** Faro, S., & Külekci, M. O. (2013, September). - _Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_. - In Stringology (pp. 78-91). - [PDF](https://www.dmi.unict.it/faro/papers/conference/faro36.pdf). -- **[4c]** Faro, S., & Külekci, M. O. (2013, January). - _Fast packed string matching for short patterns_. - In Proceedings of the Meeting on Algorithm Engineering & Expermiments - (pp. 113-121). - Society for Industrial and Applied Mathematics. - [PDF](https://arxiv.org/pdf/1209.6449.pdf). -- **[4d]** Faro, S., & Külekci, M. O. (2014). - _Fast and flexible packed string matching_. - Journal of Discrete Algorithms, 28, 61-72. - DOI: 10.1016/j.jda.2014.07.003. - -[1_u]: https://github.com/intel/hyperscan -[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/compile.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,414 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. - -use std::cmp; -use std::collections::BTreeMap; -use std::fmt; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::Teddy; - -/// A builder for constructing a Teddy matcher. -/// -/// The builder primarily permits fine grained configuration of the Teddy -/// matcher. Most options are made only available for testing/benchmarking -/// purposes. In reality, options are automatically determined by the nature -/// and number of patterns given to the builder. -#[derive(Clone, Debug)] -pub struct Builder { - /// When none, this is automatically determined. Otherwise, `false` means - /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used - /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't - /// available and Fat Teddy was requested, no matcher will be built. - fat: Option, - /// When none, this is automatically determined. Otherwise, `false` means - /// that 128-bit vectors will be used (up to SSSE3 instructions) where as - /// `true` means that 256-bit vectors will be used. As with `fat`, if - /// 256-bit vectors are requested and they aren't available, then a - /// searcher will not be built. - avx: Option, -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -impl Builder { - /// Create a new builder for configuring a Teddy matcher. - pub fn new() -> Builder { - Builder { fat: None, avx: None } - } - - /// Build a matcher for the set of patterns given. If a matcher could not - /// be built, then `None` is returned. - /// - /// Generally, a matcher isn't built if the necessary CPU features aren't - /// available, an unsupported target or if the searcher is believed to be - /// slower than standard techniques (i.e., if there are too many literals). - pub fn build(&self, patterns: &Patterns) -> Option { - self.build_imp(patterns) - } - - /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses - /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful - /// for a larger set of literals. - /// - /// `None` is the default, which results in an automatic selection based - /// on the number of literals and available CPU features. - pub fn fat(&mut self, yes: Option) -> &mut Builder { - self.fat = yes; - self - } - - /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). - /// Generally, a larger vector size is better since it either permits - /// matching more patterns or matching more bytes in the haystack at once. - /// - /// `None` is the default, which results in an automatic selection based on - /// the number of literals and available CPU features. - pub fn avx(&mut self, yes: Option) -> &mut Builder { - self.avx = yes; - self - } - - fn build_imp(&self, patterns: &Patterns) -> Option { - use crate::packed::teddy::runtime; - - // Most of the logic here is just about selecting the optimal settings, - // or perhaps even rejecting construction altogether. The choices - // we have are: fat (avx only) or not, ssse3 or avx2, and how many - // patterns we allow ourselves to search. Additionally, for testing - // and benchmarking, we permit callers to try to "force" a setting, - // and if the setting isn't allowed (e.g., forcing AVX when AVX isn't - // available), then we bail and return nothing. - - if patterns.len() > 64 { - return None; - } - let has_ssse3 = is_x86_feature_detected!("ssse3"); - let has_avx = is_x86_feature_detected!("avx2"); - let avx = if self.avx == Some(true) { - if !has_avx { - return None; - } - true - } else if self.avx == Some(false) { - if !has_ssse3 { - return None; - } - false - } else if !has_ssse3 && !has_avx { - return None; - } else { - has_avx - }; - let fat = match self.fat { - None => avx && patterns.len() > 32, - Some(false) => false, - Some(true) if !avx => return None, - Some(true) => true, - }; - - let mut compiler = Compiler::new(patterns, fat); - compiler.compile(); - let Compiler { buckets, masks, .. } = compiler; - // SAFETY: It is required that the builder only produce Teddy matchers - // that are allowed to run on the current CPU, since we later assume - // that the presence of (for example) TeddySlim1Mask256 means it is - // safe to call functions marked with the `avx2` target feature. - match (masks.len(), avx, fat) { - (1, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask128( - runtime::TeddySlim1Mask128 { - mask1: runtime::Mask128::new(masks[0]), - }, - ), - }), - (1, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask256( - runtime::TeddySlim1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (1, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat1Mask256( - runtime::TeddyFat1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }), - (2, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask128( - runtime::TeddySlim2Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - }, - ), - }), - (2, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask256( - runtime::TeddySlim2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (2, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat2Mask256( - runtime::TeddyFat2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }), - (3, false, _) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask128( - runtime::TeddySlim3Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - mask3: runtime::Mask128::new(masks[2]), - }, - ), - }), - (3, true, false) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask256( - runtime::TeddySlim3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - (3, true, true) => Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat3Mask256( - runtime::TeddyFat3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }), - _ => unreachable!(), - } - } -} - -/// A compiler is in charge of allocating patterns into buckets and generating -/// the masks necessary for searching. -#[derive(Clone)] -struct Compiler<'p> { - patterns: &'p Patterns, - buckets: Vec>, - masks: Vec, -} - -impl<'p> Compiler<'p> { - /// Create a new Teddy compiler for the given patterns. If `fat` is true, - /// then 16 buckets will be used instead of 8. - /// - /// This panics if any of the patterns given are empty. - fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> { - let mask_len = cmp::min(3, patterns.minimum_len()); - assert!(1 <= mask_len && mask_len <= 3); - - Compiler { - patterns, - buckets: vec![vec![]; if fat { 16 } else { 8 }], - masks: vec![Mask::default(); mask_len], - } - } - - /// Compile the patterns in this compiler into buckets and masks. - fn compile(&mut self) { - let mut lonibble_to_bucket: BTreeMap, usize> = BTreeMap::new(); - for (id, pattern) in self.patterns.iter() { - // We try to be slightly clever in how we assign patterns into - // buckets. Generally speaking, we want patterns with the same - // prefix to be in the same bucket, since it minimizes the amount - // of time we spend churning through buckets in the verification - // step. - // - // So we could assign patterns with the same N-prefix (where N - // is the size of the mask, which is one of {1, 2, 3}) to the - // same bucket. However, case insensitive searches are fairly - // common, so we'd for example, ideally want to treat `abc` and - // `ABC` as if they shared the same prefix. ASCII has the nice - // property that the lower 4 bits of A and a are the same, so we - // therefore group patterns with the same low-nybbe-N-prefix into - // the same bucket. - // - // MOREOVER, this is actually necessary for correctness! In - // particular, by grouping patterns with the same prefix into the - // same bucket, we ensure that we preserve correct leftmost-first - // and leftmost-longest match semantics. In addition to the fact - // that `patterns.iter()` iterates in the correct order, this - // guarantees that all possible ambiguous matches will occur in - // the same bucket. The verification routine could be adjusted to - // support correct leftmost match semantics regardless of bucket - // allocation, but that results in a performance hit. It's much - // nicer to be able to just stop as soon as a match is found. - let lonybs = pattern.low_nybbles(self.masks.len()); - if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) { - self.buckets[bucket].push(id); - } else { - // N.B. We assign buckets in reverse because it shouldn't have - // any influence on performance, but it does make it harder to - // get leftmost match semantics accidentally correct. - let bucket = (self.buckets.len() - 1) - - (id as usize % self.buckets.len()); - self.buckets[bucket].push(id); - lonibble_to_bucket.insert(lonybs, bucket); - } - } - for (bucket_index, bucket) in self.buckets.iter().enumerate() { - for &pat_id in bucket { - let pat = self.patterns.get(pat_id); - for (i, mask) in self.masks.iter_mut().enumerate() { - if self.buckets.len() == 8 { - mask.add_slim(bucket_index as u8, pat.bytes()[i]); - } else { - mask.add_fat(bucket_index as u8, pat.bytes()[i]); - } - } - } - } - } -} - -impl<'p> fmt::Debug for Compiler<'p> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut buckets = vec![vec![]; self.buckets.len()]; - for (i, bucket) in self.buckets.iter().enumerate() { - for &patid in bucket { - buckets[i].push(self.patterns.get(patid)); - } - } - f.debug_struct("Compiler") - .field("buckets", &buckets) - .field("masks", &self.masks) - .finish() - } -} - -/// Mask represents the low and high nybble masks that will be used during -/// search. Each mask is 32 bytes wide, although only the first 16 bytes are -/// used for the SSSE3 runtime. -/// -/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set -/// if and only if the corresponding nybble is in the ith bucket. The index of -/// the byte (0-15, inclusive) corresponds to the nybble. -/// -/// Each mask is used as the target of a shuffle, where the indices for the -/// shuffle are taken from the haystack. AND'ing the shuffles for both the -/// low and high masks together also results in 8-bit bitsets, but where bit -/// `i` is set if and only if the correspond *byte* is in the ith bucket. -/// -/// During compilation, masks are just arrays. But during search, these masks -/// are represented as 128-bit or 256-bit vectors. -/// -/// (See the README is this directory for more details.) -#[derive(Clone, Copy, Default)] -pub struct Mask { - lo: [u8; 32], - hi: [u8; 32], -} - -impl Mask { - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-7. - /// - /// This is for "slim" Teddy, where there are only 8 buckets. - fn add_slim(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 8); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // When using 256-bit vectors, we need to set this bucket assignment in - // the low and high 128-bit portions of the mask. This allows us to - // process 32 bytes at a time. Namely, AVX2 shuffles operate on each - // of the 128-bit lanes, rather than the full 256-bit vector at once. - self.lo[byte_lo] |= 1 << bucket; - self.lo[byte_lo + 16] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - self.hi[byte_hi + 16] |= 1 << bucket; - } - - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-15. - /// - /// This is for "fat" Teddy, where there are 16 buckets. - fn add_fat(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 16); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, - // the high 128 bits of our mask correspond to buckets 8-15, while the - // low 128 bits correspond to buckets 0-7. - if bucket < 8 { - self.lo[byte_lo] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - } else { - self.lo[byte_lo + 16] |= 1 << (bucket % 8); - self.hi[byte_hi + 16] |= 1 << (bucket % 8); - } - } - - /// Return the low 128 bits of the low-nybble mask. - pub fn lo128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.lo[..16]); - tmp - } - - /// Return the full low-nybble mask. - pub fn lo256(&self) -> [u8; 32] { - self.lo - } - - /// Return the low 128 bits of the high-nybble mask. - pub fn hi128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.hi[..16]); - tmp - } - - /// Return the full high-nybble mask. - pub fn hi256(&self) -> [u8; 32] { - self.hi - } -} - -impl fmt::Debug for Mask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let (mut parts_lo, mut parts_hi) = (vec![], vec![]); - for i in 0..32 { - parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); - parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); - } - f.debug_struct("Mask") - .field("lo", &parts_lo) - .field("hi", &parts_hi) - .finish() - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/mod.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::compile::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Builder; -#[cfg(not(target_arch = "x86_64"))] -pub use crate::packed::teddy::fallback::Teddy; -#[cfg(target_arch = "x86_64")] -pub use crate::packed::teddy::runtime::Teddy; - -#[cfg(target_arch = "x86_64")] -mod compile; -#[cfg(target_arch = "x86_64")] -mod runtime; - -#[cfg(not(target_arch = "x86_64"))] -mod fallback { - use crate::packed::pattern::Patterns; - use crate::Match; - - #[derive(Clone, Debug, Default)] - pub struct Builder(()); - - impl Builder { - pub fn new() -> Builder { - Builder(()) - } - - pub fn build(&self, _: &Patterns) -> Option { - None - } - - pub fn fat(&mut self, _: Option) -> &mut Builder { - self - } - - pub fn avx(&mut self, _: Option) -> &mut Builder { - self - } - } - - #[derive(Clone, Debug)] - pub struct Teddy(()); - - impl Teddy { - pub fn find_at( - &self, - _: &Patterns, - _: &[u8], - _: usize, - ) -> Option { - None - } - - pub fn minimum_len(&self) -> usize { - 0 - } - - pub fn heap_bytes(&self) -> usize { - 0 - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/teddy/runtime.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1204 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. -// It is strongly recommended to peruse the README before trying to grok this -// code, as its use of SIMD is pretty opaque, although I tried to add comments -// where appropriate. -// -// Moreover, while there is a lot of code in this file, most of it is -// repeated variants of the same thing. Specifically, there are three Teddy -// variants: Slim 128-bit Teddy (8 buckets), Slim 256-bit Teddy (8 buckets) -// and Fat 256-bit Teddy (16 buckets). For each variant, there are three -// implementations, corresponding to mask lengths of 1, 2 and 3. Bringing it to -// a total of nine variants. Each one is structured roughly the same: -// -// while at <= len(haystack) - CHUNK_SIZE: -// let candidate = find_candidate_in_chunk(haystack, at) -// if not all zeroes(candidate): -// if match = verify(haystack, at, candidate): -// return match -// -// For the most part, this remains unchanged. The parts that vary are the -// verification routine (for slim vs fat Teddy) and the candidate extraction -// (based on the number of masks). -// -// In the code below, a "candidate" corresponds to a single vector with 8-bit -// lanes. Each lane is itself an 8-bit bitset, where the ith bit is set in the -// jth lane if and only if the byte occurring at position `j` is in the -// bucket `i` (where the `j`th position is the position in the current window -// of the haystack, which is always 16 or 32 bytes). Note to be careful here: -// the ith bit and the jth lane correspond to the least significant bits of the -// vector. So when visualizing how the current window of bytes is stored in a -// vector, you often need to flip it around. For example, the text `abcd` in a -// 4-byte vector would look like this: -// -// 01100100 01100011 01100010 01100001 -// d c b a -// -// When the mask length is 1, then finding the candidate is pretty straight -// forward: you just apply the shuffle indices (from the haystack window) to -// the masks, and then AND them together, as described in the README. But for -// masks of length 2 and 3, you need to keep a little state. Specifically, -// you need to store the final 1 (for mask length 2) or 2 (for mask length 3) -// bytes of the candidate for use when searching the next window. This is for -// handling matches that span two windows. -// -// With respect to the repeated code, it would likely be possible to reduce -// the number of copies of code below using polymorphism, but I find this -// formulation clearer instead of needing to reason through generics. However, -// I admit, there may be a simpler generic construction that I'm missing. -// -// All variants are fairly heavily tested in src/packed/tests.rs. - -use std::arch::x86_64::*; -use std::mem; - -use crate::packed::pattern::{PatternID, Patterns}; -use crate::packed::teddy::compile; -use crate::packed::vector::*; -use crate::Match; - -/// The Teddy runtime. -/// -/// A Teddy runtime can be used to quickly search for occurrences of one or -/// more patterns. While it does not scale to an arbitrary number of patterns -/// like Aho-Corasick, it does find occurrences for a small set of patterns -/// much more quickly than Aho-Corasick. -/// -/// Teddy cannot run on small haystacks below a certain size, which is -/// dependent on the type of matcher used. This size can be queried via the -/// `minimum_len` method. Violating this will result in a panic. -/// -/// Finally, when callers use a Teddy runtime, they must provide precisely the -/// patterns used to construct the Teddy matcher. Violating this will result -/// in either a panic or incorrect results, but will never sacrifice memory -/// safety. -#[derive(Clone, Debug)] -pub struct Teddy { - /// The allocation of patterns in buckets. This only contains the IDs of - /// patterns. In order to do full verification, callers must provide the - /// actual patterns when using Teddy. - pub buckets: Vec>, - /// The maximum identifier of a pattern. This is used as a sanity check to - /// ensure that the patterns provided by the caller are the same as the - /// patterns that were used to compile the matcher. This sanity check - /// permits safely eliminating bounds checks regardless of what patterns - /// are provided by the caller. - /// - /// Note that users of the aho-corasick crate cannot get this wrong. Only - /// code internal to this crate can get it wrong, since neither `Patterns` - /// type nor the Teddy runtime are public API items. - pub max_pattern_id: PatternID, - /// The actual runtime to use. - pub exec: Exec, -} - -impl Teddy { - /// Return the first occurrence of a match in the given haystack after or - /// starting at `at`. - /// - /// The patterns provided must be precisely the same patterns given to the - /// Teddy builder, otherwise this may panic or produce incorrect results. - /// - /// All matches are consistent with the match semantics (leftmost-first or - /// leftmost-longest) set on `pats`. - pub fn find_at( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - ) -> Option { - // This assert is a bit subtle, but it's an important guarantee. - // Namely, if the maximum pattern ID seen by Teddy is the same as the - // one in the patterns given, then we are guaranteed that every pattern - // ID in all Teddy buckets are valid indices into `pats`. While this - // is nominally true, there is no guarantee that callers provide the - // same `pats` to both the Teddy builder and the searcher, which would - // otherwise make `find_at` unsafe to call. But this assert lets us - // keep this routine safe and eliminate an important bounds check in - // verification. - assert_eq!( - self.max_pattern_id, - pats.max_pattern_id(), - "teddy must be called with same patterns it was built with", - ); - // SAFETY: The haystack must have at least a minimum number of bytes - // for Teddy to be able to work. The minimum number varies depending on - // which matcher is used below. If this is violated, then it's possible - // for searching to do out-of-bounds writes. - assert!(haystack[at..].len() >= self.minimum_len()); - // SAFETY: The various Teddy matchers are always safe to call because - // the Teddy builder guarantees that a particular Exec variant is - // built only when it can be run the current CPU. That is, the Teddy - // builder will not produce a Exec::TeddySlim1Mask256 unless AVX2 is - // enabled. That is, our dynamic CPU feature detection is performed - // once in the builder, and we rely on the type system to avoid needing - // to do it again. - unsafe { - match self.exec { - Exec::TeddySlim1Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - } - } - } - - /// Returns the minimum length of a haystack that must be provided by - /// callers to this Teddy searcher. Providing a haystack shorter than this - /// will result in a panic, but will never violate memory safety. - pub fn minimum_len(&self) -> usize { - // SAFETY: These values must be correct in order to ensure safety. - // The Teddy runtime assumes their haystacks have at least these - // lengths. Violating this will sacrifice memory safety. - match self.exec { - Exec::TeddySlim1Mask128(_) => 16, - Exec::TeddySlim1Mask256(_) => 32, - Exec::TeddyFat1Mask256(_) => 16, - Exec::TeddySlim2Mask128(_) => 17, - Exec::TeddySlim2Mask256(_) => 33, - Exec::TeddyFat2Mask256(_) => 17, - Exec::TeddySlim3Mask128(_) => 18, - Exec::TeddySlim3Mask256(_) => 34, - Exec::TeddyFat3Mask256(_) => 34, - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn heap_bytes(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::() - } - - /// Runs the verification routine for Slim 128-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the SSSE3 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify128( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m128i, - ) -> Option { - debug_assert!(!is_all_zeroes128(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x128(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Slim 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = unpack64x256(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Fat 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + (j < 16 ? j : j - 16)` in `haystack` is in the - /// bucket `j < 16 ? i : i + 8`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify_fat256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!is_all_zeroes256(cand)); - debug_assert_eq!(16, self.buckets.len()); - - // This is a bit tricky, but we basically want to convert our - // candidate, which looks like this - // - // a31 a30 ... a17 a16 a15 a14 ... a01 a00 - // - // where each a(i) is an 8-bit bitset corresponding to the activated - // buckets, to this - // - // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 - // - // Namely, for Fat Teddy, the high 128-bits of the candidate correspond - // to the same bytes in the haystack in the low 128-bits (so we only - // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. - // - // The verification routine wants to look at all potentially matching - // buckets before moving on to the next lane. So for example, both - // a16 and a00 both correspond to the first byte in our window; a00 - // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, - // a16 should be checked before a01. So the transformation shown above - // allows us to use our normal verification procedure with one small - // change: we treat each bitset as 16 bits instead of 8 bits. - - // Swap the 128-bit lanes in the candidate vector. - let swap = _mm256_permute4x64_epi64(cand, 0x4E); - // Interleave the bytes from the low 128-bit lanes, starting with - // cand first. - let r1 = _mm256_unpacklo_epi8(cand, swap); - // Interleave the bytes from the high 128-bit lanes, starting with - // cand first. - let r2 = _mm256_unpackhi_epi8(cand, swap); - // Now just take the 2 low 64-bit integers from both r1 and r2. We - // can drop the high 64-bit integers because they are a mirror image - // of the low 64-bit integers. All we care about are the low 128-bit - // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets - // laid out in the desired order, as described above. - let parts = unpacklo64x256(r1, r2); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 4; - if let Some(m) = self.verify64(pats, 16, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at or after `at` in the - /// given `haystack`. The candidate given should correspond to either 8-bit - /// (for 8 buckets) or 16-bit (16 buckets) bitsets. - #[inline(always)] - fn verify64( - &self, - pats: &Patterns, - bucket_count: usize, - haystack: &[u8], - at: usize, - mut cand: u64, - ) -> Option { - // N.B. While the bucket count is known from self.buckets.len(), - // requiring it as a parameter makes it easier for the optimizer to - // know its value, and thus produce more efficient codegen. - debug_assert!(bucket_count == 8 || bucket_count == 16); - while cand != 0 { - let bit = cand.trailing_zeros() as usize; - cand &= !(1 << bit); - - let at = at + (bit / bucket_count); - let bucket = bit % bucket_count; - if let Some(m) = self.verify_bucket(pats, haystack, bucket, at) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at `at` in the given - /// `haystack` corresponding only to patterns in the given bucket. - #[inline(always)] - fn verify_bucket( - &self, - pats: &Patterns, - haystack: &[u8], - bucket: usize, - at: usize, - ) -> Option { - // Forcing this function to not inline and be "cold" seems to help - // the codegen for Teddy overall. Interestingly, this is good for a - // 16% boost in the sherlock/packed/teddy/name/alt1 benchmark (among - // others). Overall, this seems like a problem with codegen, since - // creating the Match itself is a very small amount of code. - #[cold] - #[inline(never)] - fn match_from_span( - pati: PatternID, - start: usize, - end: usize, - ) -> Match { - Match::from_span(pati as usize, start, end) - } - - // N.B. The bounds check for this bucket lookup *should* be elided - // since we assert the number of buckets in each `find_at` routine, - // and the compiler can prove that the `% 8` (or `% 16`) in callers - // of this routine will always be in bounds. - for &pati in &self.buckets[bucket] { - // SAFETY: This is safe because we are guaranteed that every - // index in a Teddy bucket is a valid index into `pats`. This - // guarantee is upheld by the assert checking `max_pattern_id` in - // the beginning of `find_at` above. - // - // This explicit bounds check elision is (amazingly) good for a - // 25-50% boost in some benchmarks, particularly ones with a lot - // of short literals. - let pat = unsafe { pats.get_unchecked(pati) }; - if pat.is_prefix(&haystack[at..]) { - return Some(match_from_span(pati, at, at + pat.len())); - } - } - None - } -} - -/// Exec represents the different search strategies supported by the Teddy -/// runtime. -/// -/// This enum is an important safety abstraction. Namely, callers should only -/// construct a variant in this enum if it is safe to execute its corresponding -/// target features on the current CPU. The 128-bit searchers require SSSE3, -/// while the 256-bit searchers require AVX2. -#[derive(Clone, Debug)] -pub enum Exec { - TeddySlim1Mask128(TeddySlim1Mask128), - TeddySlim1Mask256(TeddySlim1Mask256), - TeddyFat1Mask256(TeddyFat1Mask256), - TeddySlim2Mask128(TeddySlim2Mask128), - TeddySlim2Mask256(TeddySlim2Mask256), - TeddyFat2Mask256(TeddyFat2Mask256), - TeddySlim3Mask128(TeddySlim3Mask128), - TeddySlim3Mask256(TeddySlim3Mask256), - TeddyFat3Mask256(TeddyFat3Mask256), -} - -// Most of the code below remains undocumented because they are effectively -// repeated versions of themselves. The general structure is described in the -// README and in the comments above. - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask128 { - pub mask1: Mask128, -} - -impl TeddySlim1Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - members1m128(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask256 { - pub mask1: Mask256, -} - -impl TeddySlim1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 32 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat1Mask256 { - pub mask1: Mask256, -} - -impl TeddyFat1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, -} - -impl TeddySlim2Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones128(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1) = members2m128(chunk, self.mask1, self.mask2); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 15); - _mm_and_si128(res0prev0, res1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddySlim2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = alignr256_15(res0, *prev0); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddyFat2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = ones256(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 15); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, - pub mask3: Mask128, -} - -impl TeddySlim3Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones128(), ones128()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones128(); - prev1 = ones128(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - prev1: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = loadu128(haystack, at); - let (res0, res1, res2) = - members3m128(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm_alignr_epi8(res1, *prev1, 15); - let res = _mm_and_si128(_mm_and_si128(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddySlim3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = loadu256(haystack, at); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = alignr256_14(res0, *prev0); - let res1prev1 = alignr256_15(res1, *prev1); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddyFat3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (ones256(), ones256()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = ones256(); - prev1 = ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = _mm256_broadcastsi128_si256(loadu128(haystack, at)); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm256_alignr_epi8(res1, *prev1, 15); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -/// A 128-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -#[derive(Clone, Copy, Debug)] -pub struct Mask128 { - lo: __m128i, - hi: __m128i, -} - -impl Mask128 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask128 { - // SAFETY: This is safe since [u8; 16] has the same representation - // as __m128i. - unsafe { - Mask128 { - lo: mem::transmute(mask.lo128()), - hi: mem::transmute(mask.hi128()), - } - } - } -} - -/// A 256-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -/// -/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being -/// used. For Slim Teddy, the bitsets in the lower 128-bits are the same as -/// the bitsets in the higher 128-bits, so that we can search 32 bytes at a -/// time. (Remember, the nybbles in the haystack are used as indices into these -/// masks, and 256-bit shuffles only operate on 128-bit lanes.) -/// -/// For Fat Teddy, the bitsets are not repeated, but instead, the high 128 -/// bits correspond to buckets 8-15. So that a bitset `00100010` has buckets -/// 1 and 5 set if it's in the lower 128 bits, but has buckets 9 and 13 set -/// if it's in the higher 128 bits. -#[derive(Clone, Copy, Debug)] -pub struct Mask256 { - lo: __m256i, - hi: __m256i, -} - -impl Mask256 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask256 { - // SAFETY: This is safe since [u8; 32] has the same representation - // as __m256i. - unsafe { - Mask256 { - lo: mem::transmute(mask.lo256()), - hi: mem::transmute(mask.hi256()), - } - } - } -} - -// The "members" routines below are responsible for taking a chunk of bytes, -// a number of nybble masks and returning the result of using the masks to -// lookup bytes in the chunk. The results of the high and low nybble masks are -// AND'ed together, such that each candidate returned is a vector, with byte -// sized lanes, and where each lane is an 8-bit bitset corresponding to the -// buckets that contain the corresponding byte. -// -// In the case of masks of length greater than 1, callers will need to keep -// the results from the previous haystack's window, and then shift the vectors -// so that they all line up. Then they can be AND'ed together. - -/// Return a candidate for Slim 128-bit Teddy, where `chunk` corresponds to a -/// 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -#[target_feature(enable = "ssse3")] -unsafe fn members1m128(chunk: __m128i, mask1: Mask128) -> __m128i { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return a candidate for Slim 256-bit Teddy, where `chunk` corresponds to a -/// 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members1m256(chunk: __m256i, mask1: Mask256) -> __m256i { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members2m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, -) -> (__m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members2m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, -) -> (__m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members3m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, - mask3: Mask128, -) -> (__m128i, __m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm_and_si128( - _mm_shuffle_epi8(mask3.lo, hlo), - _mm_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members3m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, - mask3: Mask256, -) -> (__m256i, __m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm256_and_si256( - _mm256_shuffle_epi8(mask3.lo, hlo), - _mm256_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/tests.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,568 +0,0 @@ -use std::collections::HashMap; -use std::usize; - -use crate::packed::{Config, MatchKind}; -use crate::Match; - -/// A description of a single test against a multi-pattern searcher. -/// -/// A single test may not necessarily pass on every configuration of a -/// searcher. The tests are categorized and grouped appropriately below. -#[derive(Clone, Debug, Eq, PartialEq)] -struct SearchTest { - /// The name of this test, for debugging. - name: &'static str, - /// The patterns to search for. - patterns: &'static [&'static str], - /// The text to search. - haystack: &'static str, - /// Each match is a triple of (pattern_index, start, end), where - /// pattern_index is an index into `patterns` and `start`/`end` are indices - /// into `haystack`. - matches: &'static [(usize, usize, usize)], -} - -struct SearchTestOwned { - offset: usize, - name: String, - patterns: Vec, - haystack: String, - matches: Vec<(usize, usize, usize)>, -} - -impl SearchTest { - fn variations(&self) -> Vec { - let mut tests = vec![]; - for i in 0..=260 { - tests.push(self.offset_prefix(i)); - tests.push(self.offset_suffix(i)); - tests.push(self.offset_both(i)); - } - tests - } - - fn offset_both(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!( - "{}{}{}", - "Z".repeat(off), - self.haystack, - "Z".repeat(off) - ), - matches: self - .matches - .iter() - .map(|&(id, s, e)| (id, s + off, e + off)) - .collect(), - } - } - - fn offset_prefix(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!("{}{}", "Z".repeat(off), self.haystack), - matches: self - .matches - .iter() - .map(|&(id, s, e)| (id, s + off, e + off)) - .collect(), - } - } - - fn offset_suffix(&self, off: usize) -> SearchTestOwned { - SearchTestOwned { - offset: off, - name: self.name.to_string(), - patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - haystack: format!("{}{}", self.haystack, "Z".repeat(off)), - matches: self.matches.to_vec(), - } - } - - // fn to_owned(&self) -> SearchTestOwned { - // SearchTestOwned { - // name: self.name.to_string(), - // patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - // haystack: self.haystack.to_string(), - // matches: self.matches.iter().cloned().collect(), - // } - // } -} - -/// Short-hand constructor for SearchTest. We use it a lot below. -macro_rules! t { - ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { - SearchTest { - name: stringify!($name), - patterns: $patterns, - haystack: $haystack, - matches: $matches, - } - }; -} - -/// A collection of test groups. -type TestCollection = &'static [&'static [SearchTest]]; - -// Define several collections corresponding to the different type of match -// semantics supported. These collections have some overlap, but each -// collection should have some tests that no other collection has. - -/// Tests for leftmost-first match semantics. -const PACKED_LEFTMOST_FIRST: TestCollection = - &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY]; - -/// Tests for leftmost-longest match semantics. -const PACKED_LEFTMOST_LONGEST: TestCollection = - &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY]; - -// Now define the individual tests that make up the collections above. - -/// A collection of tests for the that should always be true regardless of -/// match semantics. That is, all combinations of leftmost-{first, longest} -/// should produce the same answer. -const BASICS: &'static [SearchTest] = &[ - t!(basic001, &["a"], "", &[]), - t!(basic010, &["a"], "a", &[(0, 0, 1)]), - t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), - t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]), - t!(basic050, &["a"], "bba", &[(0, 2, 3)]), - t!(basic060, &["a"], "bbb", &[]), - t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), - t!(basic100, &["aa"], "", &[]), - t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), - t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), - t!(basic130, &["aa"], "abbab", &[]), - t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), - t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]), - t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), - t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), - t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), - t!(basic300, &["a", "b"], "", &[]), - t!(basic310, &["a", "b"], "z", &[]), - t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), - t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), - t!( - basic340, - &["a", "b"], - "abba", - &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] - ), - t!( - basic350, - &["b", "a"], - "abba", - &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] - ), - t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), - t!(basic400, &["foo", "bar"], "", &[]), - t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), - t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), - t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), - t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), - t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), - t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), - t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), - t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), - t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), - t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), - t!( - basic720, - &["yabcdef", "bcdeyabc", "abcdezghi"], - "yabcdezghi", - &[(2, 1, 10),] - ), - t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), - t!( - basic840, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] - ), - t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), -]; - -/// Tests for leftmost match semantics. These should pass for both -/// leftmost-first and leftmost-longest match kinds. Stated differently, among -/// ambiguous matches, the longest match and the match that appeared first when -/// constructing the automaton should always be the same. -const LEFTMOST: &'static [SearchTest] = &[ - t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), - t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), - t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), - t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), - t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - leftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8), (0, 8, 9),] - ), -]; - -/// Tests for non-overlapping leftmost-first match semantics. These tests -/// should generally be specific to leftmost-first, which means they should -/// generally fail under leftmost-longest semantics. -const LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), - t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), - t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), - t!( - leftfirst310, - &["abcd", "b", "bce", "ce"], - "abce", - &[(1, 1, 2), (3, 2, 4),] - ), - t!( - leftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1), (2, 7, 9),] - ), - t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), - t!( - leftfirst340, - &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"], - "abcdef", - &[(0, 0, 6)] - ), -]; - -/// Tests for non-overlapping leftmost-longest match semantics. These tests -/// should generally be specific to leftmost-longest, which means they should -/// generally fail under leftmost-first semantics. -const LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), - t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), - t!( - leftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), - t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), -]; - -/// Regression tests that are applied to all combinations. -/// -/// If regression tests are needed for specific match semantics, then add them -/// to the appropriate group above. -const REGRESSION: &'static [SearchTest] = &[ - t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), - t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), - t!( - regression030, - &["libcore/", "libstd/"], - "libcore/char/methods.rs", - &[(0, 0, 8),] - ), - t!( - regression040, - &["libstd/", "libcore/"], - "libcore/char/methods.rs", - &[(1, 0, 8),] - ), - t!( - regression050, - &["\x00\x00\x01", "\x00\x00\x00"], - "\x00\x00\x00", - &[(1, 0, 3),] - ), - t!( - regression060, - &["\x00\x00\x00", "\x00\x00\x01"], - "\x00\x00\x00", - &[(0, 0, 3),] - ), -]; - -const TEDDY: &'static [SearchTest] = &[ - t!( - teddy010, - &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"], - "abcdefghijk", - &[ - (0, 0, 1), - (1, 1, 2), - (2, 2, 3), - (3, 3, 4), - (4, 4, 5), - (5, 5, 6), - (6, 6, 7), - (7, 7, 8), - (8, 8, 9), - (9, 9, 10), - (10, 10, 11) - ] - ), - t!( - teddy020, - &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"], - "abcdefghijk", - &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),] - ), - t!( - teddy030, - &["abc"], - "abcdefghijklmnopqrstuvwxyzabcdefghijk", - &[(0, 0, 3), (0, 26, 29)] - ), -]; - -// Now define a test for each combination of things above that we want to run. -// Since there are a few different combinations for each collection of tests, -// we define a couple of macros to avoid repetition drudgery. The testconfig -// macro constructs the automaton from a given match kind, and runs the search -// tests one-by-one over the given collection. The `with` parameter allows one -// to configure the config with additional parameters. The testcombo macro -// invokes testconfig in precisely this way: it sets up several tests where -// each one turns a different knob on Config. - -macro_rules! testconfig { - ($name:ident, $collection:expr, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut config = Config::new(); - $with(&mut config); - config - .builder() - .extend(test.patterns.iter().map(|p| p.as_bytes())) - .build() - .unwrap() - .find_iter(&test.haystack) - .collect() - }); - } - }; -} - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_default_leftmost_first, - PACKED_LEFTMOST_FIRST, - |_: &mut Config| {} -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_default_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.match_kind(MatchKind::LeftmostLongest); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_ssse3_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_ssse3_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_avx2_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_avx2_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_fat_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_teddy(true); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); - } - } -); - -#[cfg(target_arch = "x86_64")] -testconfig!( - search_teddy_fat_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); - if is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); - } - } -); - -testconfig!( - search_rabinkarp_leftmost_first, - PACKED_LEFTMOST_FIRST, - |c: &mut Config| { - c.force_rabin_karp(true); - } -); - -testconfig!( - search_rabinkarp_leftmost_longest, - PACKED_LEFTMOST_LONGEST, - |c: &mut Config| { - c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); - } -); - -#[test] -fn search_tests_have_unique_names() { - let assert = |constname, tests: &[SearchTest]| { - let mut seen = HashMap::new(); // map from test name to position - for (i, test) in tests.iter().enumerate() { - if !seen.contains_key(test.name) { - seen.insert(test.name, i); - } else { - let last = seen[test.name]; - panic!( - "{} tests have duplicate names at positions {} and {}", - constname, last, i - ); - } - } - }; - assert("BASICS", BASICS); - assert("LEFTMOST", LEFTMOST); - assert("LEFTMOST_FIRST", LEFTMOST_FIRST); - assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); - assert("REGRESSION", REGRESSION); - assert("TEDDY", TEDDY); -} - -fn run_search_tests Vec>( - which: TestCollection, - mut f: F, -) { - let get_match_triples = - |matches: Vec| -> Vec<(usize, usize, usize)> { - matches - .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) - .collect() - }; - for &tests in which { - for spec in tests { - for test in spec.variations() { - assert_eq!( - test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}", - test.name, - test.patterns, - test.haystack, - test.offset, - ); - } - } - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/packed/vector.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,181 +0,0 @@ -// This file contains a set of fairly generic utility functions when working -// with SIMD vectors. -// -// SAFETY: All of the routines below are unsafe to call because they assume -// the necessary CPU target features in order to use particular vendor -// intrinsics. Calling these routines when the underlying CPU does not support -// the appropriate target features is NOT safe. Callers must ensure this -// themselves. -// -// Note that it may not look like this safety invariant is being upheld when -// these routines are called. Namely, the CPU feature check is typically pretty -// far away from when these routines are used. Instead, we rely on the fact -// that certain types serve as a guaranteed receipt that pertinent target -// features are enabled. For example, the only way TeddySlim3Mask256 can be -// constructed is if the AVX2 CPU feature is available. Thus, any code running -// inside of TeddySlim3Mask256 can use any of the functions below without any -// additional checks: its very existence *is* the check. - -use std::arch::x86_64::*; - -/// Shift `a` to the left by two bytes (removing its two most significant -/// bytes), and concatenate it with the the two most significant bytes of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i { - // Credit goes to jneem for figuring this out: - // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 - // - // TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR - // instructions, which is not what we want, so we need to do some extra - // shuffling. - - // This permute gives us the low 16 bytes of a concatenated with the high - // 16 bytes of b, in order of most significant to least significant. So - // `v = a[15:0] b[31:16]`. - let v = _mm256_permute2x128_si256(b, a, 0x21); - // This effectively does this (where we deal in terms of byte-indexing - // and byte-shifting, and use inclusive ranges): - // - // ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14 - // = ((a[15:0] << 16) | b[31:16]) >> 14 - // ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14 - // = ((a[31:16] << 16) | a[15:0]) >> 14 - // - // Which therefore results in: - // - // ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30] - // - // The end result is that we've effectively done this: - // - // (a << 2) | (b >> 30) - // - // When `A` and `B` are strings---where the beginning of the string is in - // the least significant bits---we effectively result in the following - // semantic operation: - // - // (A >> 2) | (B << 30) - // - // The reversal being attributed to the fact that we are in little-endian. - _mm256_alignr_epi8(a, v, 14) -} - -/// Shift `a` to the left by one byte (removing its most significant byte), and -/// concatenate it with the the most significant byte of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i { - // For explanation, see alignr256_14. - let v = _mm256_permute2x128_si256(b, a, 0x21); - _mm256_alignr_epi8(a, v, 15) -} - -/// Unpack the given 128-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "ssse3")] -pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] { - [ - _mm_cvtsi128_si64(a) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64, - ] -} - -/// Unpack the given 256-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] { - // Using transmute here is precisely equivalent, but actually slower. It's - // not quite clear why. - let lo = _mm256_extracti128_si256(a, 0); - let hi = _mm256_extracti128_si256(a, 1); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} - -/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit -/// integers. -/// -/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element -/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits, -/// then the return value is `b2 b1 a2 a1`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] { - let lo = _mm256_castsi256_si128(a); - let hi = _mm256_castsi256_si128(b); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} - -/// Returns true if and only if all bits in the given 128-bit vector are 0. -#[target_feature(enable = "ssse3")] -pub unsafe fn is_all_zeroes128(a: __m128i) -> bool { - let cmp = _mm_cmpeq_epi8(a, zeroes128()); - _mm_movemask_epi8(cmp) as u32 == 0xFFFF -} - -/// Returns true if and only if all bits in the given 256-bit vector are 0. -#[target_feature(enable = "avx2")] -pub unsafe fn is_all_zeroes256(a: __m256i) -> bool { - let cmp = _mm256_cmpeq_epi8(a, zeroes256()); - _mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF -} - -/// Load a 128-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "sse2")] -pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm_loadu_si128(ptr as *const u8 as *const __m128i) -} - -/// Load a 256-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "avx2")] -pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm256_loadu_si256(ptr as *const u8 as *const __m256i) -} - -/// Returns a 128-bit vector with all bits set to 0. -#[target_feature(enable = "sse2")] -pub unsafe fn zeroes128() -> __m128i { - _mm_set1_epi8(0) -} - -/// Returns a 256-bit vector with all bits set to 0. -#[target_feature(enable = "avx2")] -pub unsafe fn zeroes256() -> __m256i { - _mm256_set1_epi8(0) -} - -/// Returns a 128-bit vector with all bits set to 1. -#[target_feature(enable = "sse2")] -pub unsafe fn ones128() -> __m128i { - _mm_set1_epi8(0xFF as u8 as i8) -} - -/// Returns a 256-bit vector with all bits set to 1. -#[target_feature(enable = "avx2")] -pub unsafe fn ones256() -> __m256i { - _mm256_set1_epi8(0xFF as u8 as i8) -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/prefilter.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1057 +0,0 @@ -use std::cmp; -use std::fmt; -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::u8; - -use memchr::{memchr, memchr2, memchr3}; - -use crate::ahocorasick::MatchKind; -use crate::packed; -use crate::Match; - -/// A candidate is the result of running a prefilter on a haystack at a -/// particular position. The result is either no match, a confirmed match or -/// a possible match. -/// -/// When no match is returned, the prefilter is guaranteeing that no possible -/// match can be found in the haystack, and the caller may trust this. That is, -/// all correct prefilters must never report false negatives. -/// -/// In some cases, a prefilter can confirm a match very quickly, in which case, -/// the caller may use this to stop what it's doing and report the match. In -/// this case, prefilter implementations must never report a false positive. -/// In other cases, the prefilter can only report a potential match, in which -/// case the callers must attempt to confirm the match. In this case, prefilter -/// implementations are permitted to return false positives. -#[derive(Clone, Debug)] -pub enum Candidate { - None, - Match(Match), - PossibleStartOfMatch(usize), -} - -impl Candidate { - /// Convert this candidate into an option. This is useful when callers - /// do not distinguish between true positives and false positives (i.e., - /// the caller must always confirm the match in order to update some other - /// state). - pub fn into_option(self) -> Option { - match self { - Candidate::None => None, - Candidate::Match(ref m) => Some(m.start()), - Candidate::PossibleStartOfMatch(start) => Some(start), - } - } -} - -/// A prefilter describes the behavior of fast literal scanners for quickly -/// skipping past bytes in the haystack that we know cannot possibly -/// participate in a match. -pub trait Prefilter: - Send + Sync + RefUnwindSafe + UnwindSafe + fmt::Debug -{ - /// Returns the next possible match candidate. This may yield false - /// positives, so callers must confirm a match starting at the position - /// returned. This, however, must never produce false negatives. That is, - /// this must, at minimum, return the starting position of the next match - /// in the given haystack after or at the given position. - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate; - - /// A method for cloning a prefilter, to work-around the fact that Clone - /// is not object-safe. - fn clone_prefilter(&self) -> Box; - - /// Returns the approximate total amount of heap used by this prefilter, in - /// units of bytes. - fn heap_bytes(&self) -> usize; - - /// Returns true if and only if this prefilter never returns false - /// positives. This is useful for completely avoiding the automaton - /// when the prefilter can quickly confirm its own matches. - /// - /// By default, this returns true, which is conservative; it is always - /// correct to return `true`. Returning `false` here and reporting a false - /// positive will result in incorrect searches. - fn reports_false_positives(&self) -> bool { - true - } - - /// Returns true if and only if this prefilter may look for a non-starting - /// position of a match. - /// - /// This is useful in a streaming context where prefilters that don't look - /// for a starting position of a match can be quite difficult to deal with. - /// - /// This returns false by default. - fn looks_for_non_start_of_match(&self) -> bool { - false - } -} - -impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P { - #[inline] - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - (**self).next_candidate(state, haystack, at) - } - - fn clone_prefilter(&self) -> Box { - (**self).clone_prefilter() - } - - fn heap_bytes(&self) -> usize { - (**self).heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - (**self).reports_false_positives() - } -} - -/// A convenience object for representing any type that implements Prefilter -/// and is cloneable. -#[derive(Debug)] -pub struct PrefilterObj(Box); - -impl Clone for PrefilterObj { - fn clone(&self) -> Self { - PrefilterObj(self.0.clone_prefilter()) - } -} - -impl PrefilterObj { - /// Create a new prefilter object. - pub fn new(t: T) -> PrefilterObj { - PrefilterObj(Box::new(t)) - } - - /// Return the underlying prefilter trait object. - pub fn as_ref(&self) -> &dyn Prefilter { - &*self.0 - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator via, e.g., `find_iter`, is treated as a single search.) -#[derive(Clone, Debug)] -pub struct PrefilterState { - /// The number of skips that has been executed. - skips: usize, - /// The total number of bytes that have been skipped. - skipped: usize, - /// The maximum length of a match. This is used to help determine how many - /// bytes on average should be skipped in order for a prefilter to be - /// effective. - max_match_len: usize, - /// Once this heuristic has been deemed permanently ineffective, it will be - /// inert throughout the rest of its lifetime. This serves as a cheap way - /// to check inertness. - inert: bool, - /// The last (absolute) position at which a prefilter scanned to. - /// Prefilters can use this position to determine whether to re-scan or - /// not. - /// - /// Unlike other things that impact effectiveness, this is a fleeting - /// condition. That is, a prefilter can be considered ineffective if it is - /// at a position before `last_scan_at`, but can become effective again - /// once the search moves past `last_scan_at`. - /// - /// The utility of this is to both avoid additional overhead from calling - /// the prefilter and to avoid quadratic behavior. This ensures that a - /// prefilter will scan any particular byte at most once. (Note that some - /// prefilters, like the start-byte prefilter, do not need to use this - /// field at all, since it only looks for starting bytes.) - last_scan_at: usize, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: usize = 40; - - /// The minimum amount of bytes that skipping must average, expressed as a - /// factor of the multiple of the length of a possible match. - /// - /// That is, after MIN_SKIPS have occurred, if the average number of bytes - /// skipped ever falls below MIN_AVG_FACTOR * max-match-length, then the - /// prefilter outed to be rendered inert. - const MIN_AVG_FACTOR: usize = 2; - - /// Create a fresh prefilter state. - pub fn new(max_match_len: usize) -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len, - inert: false, - last_scan_at: 0, - } - } - - /// Create a prefilter state that always disables the prefilter. - pub fn disabled() -> PrefilterState { - PrefilterState { - skips: 0, - skipped: 0, - max_match_len: 0, - inert: true, - last_scan_at: 0, - } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - fn update_skipped_bytes(&mut self, skipped: usize) { - self.skips += 1; - self.skipped += skipped; - } - - /// Updates the position at which the last scan stopped. This may be - /// greater than the position of the last candidate reported. For example, - /// searching for the "rare" byte `z` in `abczdef` for the pattern `abcz` - /// will report a candidate at position `0`, but the end of its last scan - /// will be at position `3`. - /// - /// This position factors into the effectiveness of this prefilter. If the - /// current position is less than the last position at which a scan ended, - /// then the prefilter should not be re-run until the search moves past - /// that position. - #[inline] - fn update_at(&mut self, at: usize) { - if at > self.last_scan_at { - self.last_scan_at = at; - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - /// - /// The given pos should correspond to the current starting position of the - /// search. - #[inline] - pub fn is_effective(&mut self, at: usize) -> bool { - if self.inert { - return false; - } - if at < self.last_scan_at { - return false; - } - if self.skips < PrefilterState::MIN_SKIPS { - return true; - } - - let min_avg = PrefilterState::MIN_AVG_FACTOR * self.max_match_len; - if self.skipped >= min_avg * self.skips { - return true; - } - - // We're inert. - self.inert = true; - false - } -} - -/// A builder for constructing the best possible prefilter. When constructed, -/// this builder will heuristically select the best prefilter it can build, -/// if any, and discard the rest. -#[derive(Debug)] -pub struct Builder { - count: usize, - ascii_case_insensitive: bool, - start_bytes: StartBytesBuilder, - rare_bytes: RareBytesBuilder, - packed: Option, -} - -impl Builder { - /// Create a new builder for constructing the best possible prefilter. - pub fn new(kind: MatchKind) -> Builder { - let pbuilder = kind - .as_packed() - .map(|kind| packed::Config::new().match_kind(kind).builder()); - Builder { - count: 0, - ascii_case_insensitive: false, - start_bytes: StartBytesBuilder::new(), - rare_bytes: RareBytesBuilder::new(), - packed: pbuilder, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - pub fn ascii_case_insensitive(mut self, yes: bool) -> Builder { - self.ascii_case_insensitive = yes; - self.start_bytes = self.start_bytes.ascii_case_insensitive(yes); - self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes); - self - } - - /// Return a prefilter suitable for quickly finding potential matches. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - pub fn build(&self) -> Option { - // match (self.start_bytes.build(), self.rare_bytes.build()) { - match (self.start_bytes.build(), self.rare_bytes.build()) { - // If we could build both start and rare prefilters, then there are - // a few cases in which we'd want to use the start-byte prefilter - // over the rare-byte prefilter, since the former has lower - // overhead. - (prestart @ Some(_), prerare @ Some(_)) => { - // If the start-byte prefilter can scan for a smaller number - // of bytes than the rare-byte prefilter, then it's probably - // faster. - let has_fewer_bytes = - self.start_bytes.count < self.rare_bytes.count; - // Otherwise, if the combined frequency rank of the detected - // bytes in the start-byte prefilter is "close" to the combined - // frequency rank of the rare-byte prefilter, then we pick - // the start-byte prefilter even if the rare-byte prefilter - // heuristically searches for rare bytes. This is because the - // rare-byte prefilter has higher constant costs, so we tend to - // prefer the start-byte prefilter when we can. - let has_rarer_bytes = - self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50; - if has_fewer_bytes || has_rarer_bytes { - prestart - } else { - prerare - } - } - (prestart @ Some(_), None) => prestart, - (None, prerare @ Some(_)) => prerare, - (None, None) if self.ascii_case_insensitive => None, - (None, None) => self - .packed - .as_ref() - .and_then(|b| b.build()) - .map(|s| PrefilterObj::new(Packed(s))), - } - } - - /// Add a literal string to this prefilter builder. - pub fn add(&mut self, bytes: &[u8]) { - self.count += 1; - self.start_bytes.add(bytes); - self.rare_bytes.add(bytes); - if let Some(ref mut pbuilder) = self.packed { - pbuilder.add(bytes); - } - } -} - -/// A type that wraps a packed searcher and implements the `Prefilter` -/// interface. -#[derive(Clone, Debug)] -struct Packed(packed::Searcher); - -impl Prefilter for Packed { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - self.0.find_at(haystack, at).map_or(Candidate::None, Candidate::Match) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - self.0.heap_bytes() - } - - fn reports_false_positives(&self) -> bool { - false - } -} - -/// A builder for constructing a rare byte prefilter. -/// -/// A rare byte prefilter attempts to pick out a small set of rare bytes that -/// occurr in the patterns, and then quickly scan to matches of those rare -/// bytes. -#[derive(Clone, Debug)] -struct RareBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// A set of rare bytes, indexed by byte value. - rare_set: ByteSet, - /// A set of byte offsets associated with bytes in a pattern. An entry - /// corresponds to a particular bytes (its index) and is only non-zero if - /// the byte occurred at an offset greater than 0 in at least one pattern. - /// - /// If a byte's offset is not representable in 8 bits, then the rare bytes - /// prefilter becomes inert. - byte_offsets: RareByteOffsets, - /// Whether this is available as a prefilter or not. This can be set to - /// false during construction if a condition is seen that invalidates the - /// use of the rare-byte prefilter. - available: bool, - /// The number of bytes set to an active value in `byte_offsets`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -/// A set of bytes. -#[derive(Clone, Copy)] -struct ByteSet([bool; 256]); - -impl ByteSet { - fn empty() -> ByteSet { - ByteSet([false; 256]) - } - - fn insert(&mut self, b: u8) -> bool { - let new = !self.contains(b); - self.0[b as usize] = true; - new - } - - fn contains(&self, b: u8) -> bool { - self.0[b as usize] - } -} - -impl fmt::Debug for ByteSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = vec![]; - for b in 0..=255 { - if self.contains(b) { - bytes.push(b); - } - } - f.debug_struct("ByteSet").field("set", &bytes).finish() - } -} - -/// A set of byte offsets, keyed by byte. -#[derive(Clone, Copy)] -struct RareByteOffsets { - /// Each entry corresponds to the maximum offset of the corresponding - /// byte across all patterns seen. - set: [RareByteOffset; 256], -} - -impl RareByteOffsets { - /// Create a new empty set of rare byte offsets. - pub fn empty() -> RareByteOffsets { - RareByteOffsets { set: [RareByteOffset::default(); 256] } - } - - /// Add the given offset for the given byte to this set. If the offset is - /// greater than the existing offset, then it overwrites the previous - /// value and returns false. If there is no previous value set, then this - /// sets it and returns true. - pub fn set(&mut self, byte: u8, off: RareByteOffset) { - self.set[byte as usize].max = - cmp::max(self.set[byte as usize].max, off.max); - } -} - -impl fmt::Debug for RareByteOffsets { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut offsets = vec![]; - for off in self.set.iter() { - if off.max > 0 { - offsets.push(off); - } - } - f.debug_struct("RareByteOffsets").field("set", &offsets).finish() - } -} - -/// Offsets associated with an occurrence of a "rare" byte in any of the -/// patterns used to construct a single Aho-Corasick automaton. -#[derive(Clone, Copy, Debug)] -struct RareByteOffset { - /// The maximum offset at which a particular byte occurs from the start - /// of any pattern. This is used as a shift amount. That is, when an - /// occurrence of this byte is found, the candidate position reported by - /// the prefilter is `position_of_byte - max`, such that the automaton - /// will begin its search at a position that is guaranteed to observe a - /// match. - /// - /// To avoid accidentally quadratic behavior, a prefilter is considered - /// ineffective when it is asked to start scanning from a position that it - /// has already scanned past. - /// - /// Using a `u8` here means that if we ever see a pattern that's longer - /// than 255 bytes, then the entire rare byte prefilter is disabled. - max: u8, -} - -impl Default for RareByteOffset { - fn default() -> RareByteOffset { - RareByteOffset { max: 0 } - } -} - -impl RareByteOffset { - /// Create a new rare byte offset. If the given offset is too big, then - /// None is returned. In that case, callers should render the rare bytes - /// prefilter inert. - fn new(max: usize) -> Option { - if max > u8::MAX as usize { - None - } else { - Some(RareByteOffset { max: max as u8 }) - } - } -} - -impl RareBytesBuilder { - /// Create a new builder for constructing a rare byte prefilter. - fn new() -> RareBytesBuilder { - RareBytesBuilder { - ascii_case_insensitive: false, - rare_set: ByteSet::empty(), - byte_offsets: RareByteOffsets::empty(), - available: true, - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the rare bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if !self.available || self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..=255 { - if self.rare_set.contains(b) { - bytes[len] = b as u8; - len += 1; - } - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(RareBytesOne { - byte1: bytes[0], - offset: self.byte_offsets.set[bytes[0] as usize], - })), - 2 => Some(PrefilterObj::new(RareBytesTwo { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(RareBytesThree { - offsets: self.byte_offsets, - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - // If we've already given up, then do nothing. - if !self.available { - return; - } - // If we've already blown our budget, then don't waste time looking - // for more rare bytes. - if self.count > 3 { - self.available = false; - return; - } - // If the pattern is too long, then our offset table is bunk, so - // give up. - if bytes.len() >= 256 { - self.available = false; - return; - } - let mut rarest = match bytes.get(0) { - None => return, - Some(&b) => (b, freq_rank(b)), - }; - // The idea here is to look for the rarest byte in each pattern, and - // add that to our set. As a special exception, if we see a byte that - // we've already added, then we immediately stop and choose that byte, - // even if there's another rare byte in the pattern. This helps us - // apply the rare byte optimization in more cases by attempting to pick - // bytes that are in common between patterns. So for example, if we - // were searching for `Sherlock` and `lockjaw`, then this would pick - // `k` for both patterns, resulting in the use of `memchr` instead of - // `memchr2` for `k` and `j`. - let mut found = false; - for (pos, &b) in bytes.iter().enumerate() { - self.set_offset(pos, b); - if found { - continue; - } - if self.rare_set.contains(b) { - found = true; - continue; - } - let rank = freq_rank(b); - if rank < rarest.1 { - rarest = (b, rank); - } - } - if !found { - self.add_rare_byte(rarest.0); - } - } - - fn set_offset(&mut self, pos: usize, byte: u8) { - // This unwrap is OK because pos is never bigger than our max. - let offset = RareByteOffset::new(pos).unwrap(); - self.byte_offsets.set(byte, offset); - if self.ascii_case_insensitive { - self.byte_offsets.set(opposite_ascii_case(byte), offset); - } - } - - fn add_rare_byte(&mut self, byte: u8) { - self.add_one_rare_byte(byte); - if self.ascii_case_insensitive { - self.add_one_rare_byte(opposite_ascii_case(byte)); - } - } - - fn add_one_rare_byte(&mut self, byte: u8) { - if self.rare_set.insert(byte) { - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single "rare" byte. -#[derive(Clone, Debug)] -struct RareBytesOne { - byte1: u8, - offset: RareByteOffset, -} - -impl Prefilter for RareBytesOne { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.last_scan_at = pos; - cmp::max(at, pos.saturating_sub(self.offset.max as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: It should be possible to use a rare byte prefilter in a - // streaming context. The main problem is that we usually assume that - // if a prefilter has scanned some text and not found anything, then no - // match *starts* in that text. This doesn't matter in non-streaming - // contexts, but in a streaming context, if we're looking for a byte - // that doesn't start at the beginning of a match and don't find it, - // then it's still possible for a match to start at the end of the - // current buffer content. In order to fix this, the streaming searcher - // would need to become aware of prefilters that do this and use the - // appropriate offset in various places. It is quite a delicate change - // and probably shouldn't be attempted until streaming search has a - // better testing strategy. In particular, we'd really like to be able - // to vary the buffer size to force strange cases that occur at the - // edge of the buffer. If we make the buffer size minimal, then these - // cases occur more frequently and easier. - // - // This is also a bummer because this means that if the prefilter - // builder chose a rare byte prefilter, then a streaming search won't - // use any prefilter at all because the builder doesn't know how it's - // going to be used. Assuming we don't make streaming search aware of - // these special types of prefilters as described above, we could fix - // this by building a "backup" prefilter that could be used when the - // rare byte prefilter could not. But that's a bandaide. Sigh. - true - } -} - -/// A prefilter for scanning for two "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesTwo { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, -} - -impl Prefilter for RareBytesTwo { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A prefilter for scanning for three "rare" bytes. -#[derive(Clone, Debug)] -struct RareBytesThree { - offsets: RareByteOffsets, - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for RareBytesThree { - fn next_candidate( - &self, - state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| { - let pos = at + i; - state.update_at(pos); - let offset = self.offsets.set[haystack[pos] as usize].max; - cmp::max(at, pos.saturating_sub(offset as usize)) - }) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } - - fn looks_for_non_start_of_match(&self) -> bool { - // TODO: See Prefilter impl for RareBytesOne. - true - } -} - -/// A builder for constructing a starting byte prefilter. -/// -/// A starting byte prefilter is a simplistic prefilter that looks for possible -/// matches by reporting all positions corresponding to a particular byte. This -/// generally only takes affect when there are at most 3 distinct possible -/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two -/// distinct starting bytes (`f` and `b`), and this prefilter returns all -/// occurrences of either `f` or `b`. -/// -/// In some cases, a heuristic frequency analysis may determine that it would -/// be better not to use this prefilter even when there are 3 or fewer distinct -/// starting bytes. -#[derive(Clone, Debug)] -struct StartBytesBuilder { - /// Whether this prefilter should account for ASCII case insensitivity or - /// not. - ascii_case_insensitive: bool, - /// The set of starting bytes observed. - byteset: Vec, - /// The number of bytes set to true in `byteset`. - count: usize, - /// The sum of frequency ranks for the rare bytes detected. This is - /// intended to give a heuristic notion of how rare the bytes are. - rank_sum: u16, -} - -impl StartBytesBuilder { - /// Create a new builder for constructing a start byte prefilter. - fn new() -> StartBytesBuilder { - StartBytesBuilder { - ascii_case_insensitive: false, - byteset: vec![false; 256], - count: 0, - rank_sum: 0, - } - } - - /// Enable ASCII case insensitivity. When set, byte strings added to this - /// builder will be interpreted without respect to ASCII case. - fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder { - self.ascii_case_insensitive = yes; - self - } - - /// Build the starting bytes prefilter. - /// - /// If there are more than 3 distinct starting bytes, or if heuristics - /// otherwise determine that this prefilter should not be used, then `None` - /// is returned. - fn build(&self) -> Option { - if self.count > 3 { - return None; - } - let (mut bytes, mut len) = ([0; 3], 0); - for b in 0..256 { - if !self.byteset[b] { - continue; - } - // We don't handle non-ASCII bytes for now. Getting non-ASCII - // bytes right is trickier, since we generally don't want to put - // a leading UTF-8 code unit into a prefilter that isn't ASCII, - // since they can frequently. Instead, it would be better to use a - // continuation byte, but this requires more sophisticated analysis - // of the automaton and a richer prefilter API. - if b > 0x7F { - return None; - } - bytes[len] = b as u8; - len += 1; - } - match len { - 0 => None, - 1 => Some(PrefilterObj::new(StartBytesOne { byte1: bytes[0] })), - 2 => Some(PrefilterObj::new(StartBytesTwo { - byte1: bytes[0], - byte2: bytes[1], - })), - 3 => Some(PrefilterObj::new(StartBytesThree { - byte1: bytes[0], - byte2: bytes[1], - byte3: bytes[2], - })), - _ => unreachable!(), - } - } - - /// Add a byte string to this builder. - /// - /// All patterns added to an Aho-Corasick automaton should be added to this - /// builder before attempting to construct the prefilter. - fn add(&mut self, bytes: &[u8]) { - if self.count > 3 { - return; - } - if let Some(&byte) = bytes.get(0) { - self.add_one_byte(byte); - if self.ascii_case_insensitive { - self.add_one_byte(opposite_ascii_case(byte)); - } - } - } - - fn add_one_byte(&mut self, byte: u8) { - if !self.byteset[byte as usize] { - self.byteset[byte as usize] = true; - self.count += 1; - self.rank_sum += freq_rank(byte) as u16; - } - } -} - -/// A prefilter for scanning for a single starting byte. -#[derive(Clone, Debug)] -struct StartBytesOne { - byte1: u8, -} - -impl Prefilter for StartBytesOne { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr(self.byte1, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for two starting bytes. -#[derive(Clone, Debug)] -struct StartBytesTwo { - byte1: u8, - byte2: u8, -} - -impl Prefilter for StartBytesTwo { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr2(self.byte1, self.byte2, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// A prefilter for scanning for three starting bytes. -#[derive(Clone, Debug)] -struct StartBytesThree { - byte1: u8, - byte2: u8, - byte3: u8, -} - -impl Prefilter for StartBytesThree { - fn next_candidate( - &self, - _state: &mut PrefilterState, - haystack: &[u8], - at: usize, - ) -> Candidate { - memchr3(self.byte1, self.byte2, self.byte3, &haystack[at..]) - .map(|i| at + i) - .map_or(Candidate::None, Candidate::PossibleStartOfMatch) - } - - fn clone_prefilter(&self) -> Box { - Box::new(self.clone()) - } - - fn heap_bytes(&self) -> usize { - 0 - } -} - -/// Return the next candidate reported by the given prefilter while -/// simultaneously updating the given prestate. -/// -/// The caller is responsible for checking the prestate before deciding whether -/// to initiate a search. -#[inline] -pub fn next( - prestate: &mut PrefilterState, - prefilter: P, - haystack: &[u8], - at: usize, -) -> Candidate { - let cand = prefilter.next_candidate(prestate, haystack, at); - match cand { - Candidate::None => { - prestate.update_skipped_bytes(haystack.len() - at); - } - Candidate::Match(ref m) => { - prestate.update_skipped_bytes(m.start() - at); - } - Candidate::PossibleStartOfMatch(i) => { - prestate.update_skipped_bytes(i - at); - } - } - cand -} - -/// If the given byte is an ASCII letter, then return it in the opposite case. -/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns -/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned. -pub fn opposite_ascii_case(b: u8) -> u8 { - if b'A' <= b && b <= b'Z' { - b.to_ascii_lowercase() - } else if b'a' <= b && b <= b'z' { - b.to_ascii_uppercase() - } else { - b - } -} - -/// Return the frequency rank of the given byte. The higher the rank, the more -/// common the byte (heuristically speaking). -fn freq_rank(b: u8) -> u8 { - use crate::byte_frequencies::BYTE_FREQUENCIES; - BYTE_FREQUENCIES[b as usize] -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn scratch() { - let mut b = Builder::new(MatchKind::LeftmostFirst); - b.add(b"Sherlock"); - b.add(b"locjaw"); - // b.add(b"Sherlock"); - // b.add(b"Holmes"); - // b.add(b"Watson"); - // b.add("Шерлок Холмс".as_bytes()); - // b.add("Джон Уотсон".as_bytes()); - - let s = b.build().unwrap(); - println!("{:?}", s); - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/state_id.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,192 +0,0 @@ -use std::fmt::Debug; -use std::hash::Hash; - -use crate::error::{Error, Result}; - -// NOTE: Most of this code was copied from regex-automata, but without the -// (de)serialization specific stuff. - -/// Check that the premultiplication of the given state identifier can -/// fit into the representation indicated by `S`. If it cannot, or if it -/// overflows `usize` itself, then an error is returned. -pub fn premultiply_overflow_error( - last_state: S, - alphabet_len: usize, -) -> Result<()> { - let requested = match last_state.to_usize().checked_mul(alphabet_len) { - Some(requested) => requested, - None => return Err(Error::premultiply_overflow(0, 0)), - }; - if requested > S::max_id() { - return Err(Error::premultiply_overflow(S::max_id(), requested)); - } - Ok(()) -} - -/// Convert the given `usize` to the chosen state identifier -/// representation. If the given value cannot fit in the chosen -/// representation, then an error is returned. -pub fn usize_to_state_id(value: usize) -> Result { - if value > S::max_id() { - Err(Error::state_id_overflow(S::max_id())) - } else { - Ok(S::from_usize(value)) - } -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn fail_id() -> S { - S::from_usize(0) -} - -/// Return the unique identifier for an automaton's fail state in the chosen -/// representation indicated by `S`. -pub fn dead_id() -> S { - S::from_usize(1) -} - -mod private { - /// Sealed stops crates other than aho-corasick from implementing any - /// traits that use it. - pub trait Sealed {} - impl Sealed for u8 {} - impl Sealed for u16 {} - impl Sealed for u32 {} - impl Sealed for u64 {} - impl Sealed for usize {} -} - -/// A trait describing the representation of an automaton's state identifier. -/// -/// The purpose of this trait is to safely express both the possible state -/// identifier representations that can be used in an automaton and to convert -/// between state identifier representations and types that can be used to -/// efficiently index memory (such as `usize`). -/// -/// In general, one should not need to implement this trait explicitly. Indeed, -/// for now, this trait is sealed such that it cannot be implemented by any -/// other type. In particular, this crate provides implementations for `u8`, -/// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for -/// targets that can represent all corresponding values in a `usize`.) -pub trait StateID: - private::Sealed - + Clone - + Copy - + Debug - + Eq - + Hash - + PartialEq - + PartialOrd - + Ord -{ - /// Convert from a `usize` to this implementation's representation. - /// - /// Implementors may assume that `n <= Self::max_id`. That is, implementors - /// do not need to check whether `n` can fit inside this implementation's - /// representation. - fn from_usize(n: usize) -> Self; - - /// Convert this implementation's representation to a `usize`. - /// - /// Implementors must not return a `usize` value greater than - /// `Self::max_id` and must not permit overflow when converting between the - /// implementor's representation and `usize`. In general, the preferred - /// way for implementors to achieve this is to simply not provide - /// implementations of `StateID` that cannot fit into the target platform's - /// `usize`. - fn to_usize(self) -> usize; - - /// Return the maximum state identifier supported by this representation. - /// - /// Implementors must return a correct bound. Doing otherwise may result - /// in unspecified behavior (but will not violate memory safety). - fn max_id() -> usize; -} - -impl StateID for usize { - #[inline] - fn from_usize(n: usize) -> usize { - n - } - - #[inline] - fn to_usize(self) -> usize { - self - } - - #[inline] - fn max_id() -> usize { - ::std::usize::MAX - } -} - -impl StateID for u8 { - #[inline] - fn from_usize(n: usize) -> u8 { - n as u8 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u8::MAX as usize - } -} - -impl StateID for u16 { - #[inline] - fn from_usize(n: usize) -> u16 { - n as u16 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u16::MAX as usize - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -impl StateID for u32 { - #[inline] - fn from_usize(n: usize) -> u32 { - n as u32 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u32::MAX as usize - } -} - -#[cfg(target_pointer_width = "64")] -impl StateID for u64 { - #[inline] - fn from_usize(n: usize) -> u64 { - n as u64 - } - - #[inline] - fn to_usize(self) -> usize { - self as usize - } - - #[inline] - fn max_id() -> usize { - ::std::u64::MAX as usize - } -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/aho-corasick/src/tests.rs 1970-01-01 00:00:00.000000000 +0000 @@ -1,1254 +0,0 @@ -use std::collections::HashMap; -use std::io; -use std::usize; - -use crate::{AhoCorasickBuilder, Match, MatchKind}; - -/// A description of a single test against an Aho-Corasick automaton. -/// -/// A single test may not necessarily pass on every configuration of an -/// Aho-Corasick automaton. The tests are categorized and grouped appropriately -/// below. -#[derive(Clone, Debug, Eq, PartialEq)] -struct SearchTest { - /// The name of this test, for debugging. - name: &'static str, - /// The patterns to search for. - patterns: &'static [&'static str], - /// The text to search. - haystack: &'static str, - /// Each match is a triple of (pattern_index, start, end), where - /// pattern_index is an index into `patterns` and `start`/`end` are indices - /// into `haystack`. - matches: &'static [(usize, usize, usize)], -} - -/// Short-hand constructor for SearchTest. We use it a lot below. -macro_rules! t { - ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { - SearchTest { - name: stringify!($name), - patterns: $patterns, - haystack: $haystack, - matches: $matches, - } - }; -} - -/// A collection of test groups. -type TestCollection = &'static [&'static [SearchTest]]; - -// Define several collections corresponding to the different type of match -// semantics supported by Aho-Corasick. These collections have some overlap, -// but each collection should have some tests that no other collection has. - -/// Tests for Aho-Corasick's standard non-overlapping match semantics. -const AC_STANDARD_NON_OVERLAPPING: TestCollection = - &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION]; - -/// Tests for Aho-Corasick's anchored standard non-overlapping match semantics. -const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection = - &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED]; - -/// Tests for Aho-Corasick's standard overlapping match semantics. -const AC_STANDARD_OVERLAPPING: TestCollection = - &[BASICS, OVERLAPPING, REGRESSION]; - -/// Tests for Aho-Corasick's anchored standard overlapping match semantics. -const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection = - &[ANCHORED_BASICS, ANCHORED_OVERLAPPING]; - -/// Tests for Aho-Corasick's leftmost-first match semantics. -const AC_LEFTMOST_FIRST: TestCollection = - &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION]; - -/// Tests for Aho-Corasick's anchored leftmost-first match semantics. -const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[ - ANCHORED_BASICS, - ANCHORED_NON_OVERLAPPING, - ANCHORED_LEFTMOST, - ANCHORED_LEFTMOST_FIRST, -]; - -/// Tests for Aho-Corasick's leftmost-longest match semantics. -const AC_LEFTMOST_LONGEST: TestCollection = - &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION]; - -/// Tests for Aho-Corasick's anchored leftmost-longest match semantics. -const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[ - ANCHORED_BASICS, - ANCHORED_NON_OVERLAPPING, - ANCHORED_LEFTMOST, - ANCHORED_LEFTMOST_LONGEST, -]; - -// Now define the individual tests that make up the collections above. - -/// A collection of tests for the Aho-Corasick algorithm that should always be -/// true regardless of match semantics. That is, all combinations of -/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} -/// should produce the same answer. -const BASICS: &'static [SearchTest] = &[ - t!(basic000, &[], "", &[]), - t!(basic001, &["a"], "", &[]), - t!(basic010, &["a"], "a", &[(0, 0, 1)]), - t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), - t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]), - t!(basic050, &["a"], "bba", &[(0, 2, 3)]), - t!(basic060, &["a"], "bbb", &[]), - t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), - t!(basic100, &["aa"], "", &[]), - t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), - t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), - t!(basic130, &["aa"], "abbab", &[]), - t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), - t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), - t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), - t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), - t!(basic300, &["a", "b"], "", &[]), - t!(basic310, &["a", "b"], "z", &[]), - t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), - t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), - t!( - basic340, - &["a", "b"], - "abba", - &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] - ), - t!( - basic350, - &["b", "a"], - "abba", - &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] - ), - t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), - t!(basic400, &["foo", "bar"], "", &[]), - t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), - t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), - t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), - t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), - t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), - t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), - t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), - t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), - t!(basic600, &[""], "", &[(0, 0, 0)]), - t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), - t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), - t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), - t!( - basic720, - &["yabcdef", "bcdeyabc", "abcdezghi"], - "yabcdezghi", - &[(2, 1, 10),] - ), -]; - -/// A collection of *anchored* tests for the Aho-Corasick algorithm that should -/// always be true regardless of match semantics. That is, all combinations of -/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should -/// produce the same answer. -const ANCHORED_BASICS: &'static [SearchTest] = &[ - t!(abasic000, &[], "", &[]), - t!(abasic010, &[""], "", &[(0, 0, 0)]), - t!(abasic020, &[""], "a", &[(0, 0, 0)]), - t!(abasic030, &[""], "abc", &[(0, 0, 0)]), - t!(abasic100, &["a"], "a", &[(0, 0, 1)]), - t!(abasic110, &["a"], "aa", &[(0, 0, 1)]), - t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1)]), - t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1)]), - t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]), - t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]), -]; - -/// Tests for non-overlapping standard match semantics. -/// -/// These tests generally shouldn't pass for leftmost-{first,longest}, although -/// some do in order to write clearer tests. For example, standard000 will -/// pass with leftmost-first semantics, but standard010 will not. We write -/// both to emphasize how the match semantics work. -const STANDARD: &'static [SearchTest] = &[ - t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]), - t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), - t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), - t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]), - t!( - standard400, - &["abcd", "bcd", "cd", "b"], - "abcd", - &[(3, 1, 2), (2, 2, 4),] - ), - t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]), - t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]), - t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), -]; - -/// Like STANDARD, but for anchored searches. -const STANDARD_ANCHORED: &'static [SearchTest] = &[ - t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]), - t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]), - t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]), - t!(astandard040, &["a", ""], "a", &[(1, 0, 0)]), - t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), - t!(astandard410, &["", "a"], "a", &[(0, 0, 0)]), - t!(astandard420, &["", "a"], "aa", &[(0, 0, 0)]), - t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0)]), - t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0)]), - t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0)]), -]; - -/// Tests for non-overlapping leftmost match semantics. These should pass for -/// both leftmost-first and leftmost-longest match kinds. Stated differently, -/// among ambiguous matches, the longest match and the match that appeared -/// first when constructing the automaton should always be the same. -const LEFTMOST: &'static [SearchTest] = &[ - t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]), - t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), - t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), - t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), - t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), - t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), - t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), - t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - leftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - leftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - leftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8), (0, 8, 9),] - ), -]; - -/// Like LEFTMOST, but for anchored searches. -const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ - t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]), - t!(aleftmost020, &["", ""], "a", &[(0, 0, 0)]), - t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1)]), - t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1)]), - t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]), - t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]), - t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]), - t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]), - t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]), - t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), - t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), - t!( - aleftmost360, - &["abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - aleftmost370, - &["abcdefghi", "cde", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost380, - &["abcdefghi", "hz", "abcdefgh", "a"], - "abcdefghz", - &[(2, 0, 8),] - ), - t!( - aleftmost390, - &["b", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost400, - &["h", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!( - aleftmost410, - &["z", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8)] - ), -]; - -/// Tests for non-overlapping leftmost-first match semantics. These tests -/// should generally be specific to leftmost-first, which means they should -/// generally fail under leftmost-longest semantics. -const LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]), - t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]), - t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), - t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), - t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), - t!( - leftfirst310, - &["abcd", "b", "bce", "ce"], - "abce", - &[(1, 1, 2), (3, 2, 4),] - ), - t!( - leftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1), (2, 7, 9),] - ), - t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), - t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]), -]; - -/// Like LEFTMOST_FIRST, but for anchored searches. -const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ - t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), - t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0)]), - t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0)]), - t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]), - t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0)]), - t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), - t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), - t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]), - t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), - t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), - t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]), - t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]), - t!( - aleftfirst320, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(0, 0, 1)] - ), - t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]), - t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]), -]; - -/// Tests for non-overlapping leftmost-longest match semantics. These tests -/// should generally be specific to leftmost-longest, which means they should -/// generally fail under leftmost-first semantics. -const LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]), - t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]), - t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]), - t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]), - t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), - t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), - t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), - t!( - leftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), - t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), -]; - -/// Like LEFTMOST_LONGEST, but for anchored searches. -const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[ - t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), - t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), - t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]), - t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]), - t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]), - t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]), - t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1)]), - t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), - t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), - t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), - t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), - t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), - t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), - t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]), - t!( - aleftlong310, - &["a", "abcdefghi", "hz", "abcdefgh"], - "abcdefghz", - &[(3, 0, 8),] - ), - t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), - t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]), - t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]), -]; - -/// Tests for non-overlapping match semantics. -/// -/// Generally these tests shouldn't pass when using overlapping semantics. -/// These should pass for both standard and leftmost match semantics. -const NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), - t!( - nover100, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] - ), - t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), - t!(nover300, &["", ""], "", &[(0, 0, 0),]), - t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]), -]; - -/// Like NON_OVERLAPPING, but for anchored searches. -const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), - t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), - t!(anover030, &["abc", "bc"], "zazabcz", &[]), - t!(anover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), - t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]), - t!(anover300, &["", ""], "", &[(0, 0, 0),]), - t!(anover310, &["", ""], "a", &[(0, 0, 0)]), -]; - -/// Tests for overlapping match semantics. -/// -/// This only supports standard match semantics, since leftmost-{first,longest} -/// do not support overlapping matches. -const OVERLAPPING: &'static [SearchTest] = &[ - t!( - over000, - &["abcd", "bcd", "cd", "b"], - "abcd", - &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),] - ), - t!( - over010, - &["bcd", "cd", "b", "abcd"], - "abcd", - &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),] - ), - t!( - over020, - &["abcd", "bcd", "cd"], - "abcd", - &[(0, 0, 4), (1, 1, 4), (2, 2, 4),] - ), - t!( - over030, - &["bcd", "abcd", "cd"], - "abcd", - &[(1, 0, 4), (0, 1, 4), (2, 2, 4),] - ), - t!( - over040, - &["bcd", "cd", "abcd"], - "abcd", - &[(2, 0, 4), (0, 1, 4), (1, 2, 4),] - ), - t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]), - t!( - over100, - &["ab", "ba"], - "abababa", - &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),] - ), - t!( - over200, - &["foo", "foo"], - "foobarfoo", - &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),] - ), - t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), - t!( - over310, - &["", ""], - "a", - &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),] - ), - t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]), - t!( - over330, - &["", "a", ""], - "a", - &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),] - ), - t!( - over340, - &["a", "", ""], - "a", - &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),] - ), - t!( - over350, - &["", "", "a"], - "a", - &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),] - ), - t!( - over360, - &["foo", "foofoo"], - "foofoo", - &[(0, 0, 3), (1, 0, 6), (0, 3, 6)] - ), -]; - -/// Like OVERLAPPING, but for anchored searches. -const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ - t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), - t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]), - t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]), - t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]), - t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]), - t!(aover050, &["abc", "bc"], "zazabcz", &[]), - t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), - t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]), - t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), - t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]), - t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]), - t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]), - t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]), - t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]), - t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]), -]; - -/// Tests for ASCII case insensitivity. -/// -/// These tests should all have the same behavior regardless of match semantics -/// or whether the search is overlapping. -const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[ - t!(acasei000, &["a"], "A", &[(0, 0, 1)]), - t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]), - t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]), - t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]), -]; - -/// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests. -const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[ - t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]), - t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]), - t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]), -]; - -/// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests. -const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[ - t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]), - t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]), - // This is a regression test from: - // https://github.com/BurntSushi/aho-corasick/issues/68 - // Previously, it was reporting a duplicate (1, 3, 6) match. - t!( - acasei010, - &["abc", "def", "abcdef"], - "abcdef", - &[(0, 0, 3), (2, 0, 6), (1, 3, 6)] - ), -]; - -/// Regression tests that are applied to all Aho-Corasick combinations. -/// -/// If regression tests are needed for specific match semantics, then add them -/// to the appropriate group above. -const REGRESSION: &'static [SearchTest] = &[ - t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), - t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), - t!( - regression030, - &["libcore/", "libstd/"], - "libcore/char/methods.rs", - &[(0, 0, 8),] - ), - t!( - regression040, - &["libstd/", "libcore/"], - "libcore/char/methods.rs", - &[(1, 0, 8),] - ), - t!( - regression050, - &["\x00\x00\x01", "\x00\x00\x00"], - "\x00\x00\x00", - &[(1, 0, 3),] - ), - t!( - regression060, - &["\x00\x00\x00", "\x00\x00\x01"], - "\x00\x00\x00", - &[(0, 0, 3),] - ), -]; - -// Now define a test for each combination of things above that we want to run. -// Since there are a few different combinations for each collection of tests, -// we define a couple of macros to avoid repetition drudgery. The testconfig -// macro constructs the automaton from a given match kind, and runs the search -// tests one-by-one over the given collection. The `with` parameter allows one -// to configure the builder with additional parameters. The testcombo macro -// invokes testconfig in precisely this way: it sets up several tests where -// each one turns a different knob on AhoCorasickBuilder. - -macro_rules! testconfig { - (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .find_overlapping_iter(test.haystack) - .collect() - }); - } - }; - (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let buf = - io::BufReader::with_capacity(1, test.haystack.as_bytes()); - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .stream_find_iter(buf) - .map(|result| result.unwrap()) - .collect() - }); - } - }; - ($name:ident, $collection:expr, $kind:ident, $with:expr) => { - #[test] - fn $name() { - run_search_tests($collection, |test| { - let mut builder = AhoCorasickBuilder::new(); - $with(&mut builder); - builder - .match_kind(MatchKind::$kind) - .build(test.patterns) - .find_iter(test.haystack) - .collect() - }); - } - }; -} - -macro_rules! testcombo { - ($name:ident, $collection:expr, $kind:ident) => { - mod $name { - use super::*; - - testconfig!(nfa_default, $collection, $kind, |_| ()); - testconfig!( - nfa_no_prefilter, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.prefilter(false); - } - ); - testconfig!( - nfa_all_sparse, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); - } - ); - testconfig!( - nfa_all_dense, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); - } - ); - testconfig!( - dfa_default, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } - ); - testconfig!( - dfa_no_prefilter, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).prefilter(false); - } - ); - testconfig!( - dfa_all_sparse, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); - } - ); - testconfig!( - dfa_all_dense, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); - } - ); - testconfig!( - dfa_no_byte_class, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); - } - ); - testconfig!( - dfa_no_premultiply, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); - } - ); - testconfig!( - dfa_no_byte_class_no_premultiply, - $collection, - $kind, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); - } - ); - } - }; -} - -// Write out the combinations. -testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); -testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); -testcombo!( - search_standard_nonoverlapping, - AC_STANDARD_NON_OVERLAPPING, - Standard -); - -// Write out the overlapping combo by hand since there is only one of them. -testconfig!( - overlapping, - search_standard_overlapping_nfa_default, - AC_STANDARD_OVERLAPPING, - Standard, - |_| () -); -testconfig!( - overlapping, - search_standard_overlapping_nfa_all_sparse, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(0); - } -); -testconfig!( - overlapping, - search_standard_overlapping_nfa_all_dense, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dense_depth(usize::MAX); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_default, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_all_sparse, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(0); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_all_dense, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true).dense_depth(usize::MAX); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_byte_class, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_premultiply, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when option is removed. - #[allow(deprecated)] - b.dfa(true).premultiply(false); - } -); -testconfig!( - overlapping, - search_standard_overlapping_dfa_no_byte_class_no_premultiply, - AC_STANDARD_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - // TODO: remove tests when options are removed. - #[allow(deprecated)] - b.dfa(true).byte_classes(false).premultiply(false); - } -); - -// Also write out tests manually for streams, since we only test the standard -// match semantics. We also don't bother testing different automaton -// configurations, since those are well covered by tests above. -testconfig!( - stream, - search_standard_stream_nfa_default, - AC_STANDARD_NON_OVERLAPPING, - Standard, - |_| () -); -testconfig!( - stream, - search_standard_stream_dfa_default, - AC_STANDARD_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.dfa(true); - } -); - -// Same thing for anchored searches. Write them out manually. -testconfig!( - search_standard_anchored_nfa_default, - AC_STANDARD_ANCHORED_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_standard_anchored_dfa_default, - AC_STANDARD_ANCHORED_NON_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - overlapping, - search_standard_anchored_overlapping_nfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - overlapping, - search_standard_anchored_overlapping_dfa_default, - AC_STANDARD_ANCHORED_OVERLAPPING, - Standard, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - search_leftmost_first_anchored_nfa_default, - AC_LEFTMOST_FIRST_ANCHORED, - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_leftmost_first_anchored_dfa_default, - AC_LEFTMOST_FIRST_ANCHORED, - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); -testconfig!( - search_leftmost_longest_anchored_nfa_default, - AC_LEFTMOST_LONGEST_ANCHORED, - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.anchored(true); - } -); -testconfig!( - search_leftmost_longest_anchored_dfa_default, - AC_LEFTMOST_LONGEST_ANCHORED, - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.anchored(true).dfa(true); - } -); - -// And also write out the test combinations for ASCII case insensitivity. -testconfig!( - acasei_standard_nfa_default, - &[ASCII_CASE_INSENSITIVE], - Standard, - |b: &mut AhoCorasickBuilder| { - b.prefilter(false).ascii_case_insensitive(true); - } -); -testconfig!( - acasei_standard_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - overlapping, - acasei_standard_overlapping_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - overlapping, - acasei_standard_overlapping_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], - Standard, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - acasei_leftmost_first_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - acasei_leftmost_first_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostFirst, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); -testconfig!( - acasei_leftmost_longest_nfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true); - } -); -testconfig!( - acasei_leftmost_longest_dfa_default, - &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], - LeftmostLongest, - |b: &mut AhoCorasickBuilder| { - b.ascii_case_insensitive(true).dfa(true); - } -); - -fn run_search_tests Vec>( - which: TestCollection, - mut f: F, -) { - let get_match_triples = - |matches: Vec| -> Vec<(usize, usize, usize)> { - matches - .into_iter() - .map(|m| (m.pattern(), m.start(), m.end())) - .collect() - }; - for &tests in which { - for test in tests { - assert_eq!( - test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}", - test.name, - test.patterns, - test.haystack - ); - } - } -} - -#[test] -fn search_tests_have_unique_names() { - let assert = |constname, tests: &[SearchTest]| { - let mut seen = HashMap::new(); // map from test name to position - for (i, test) in tests.iter().enumerate() { - if !seen.contains_key(test.name) { - seen.insert(test.name, i); - } else { - let last = seen[test.name]; - panic!( - "{} tests have duplicate names at positions {} and {}", - constname, last, i - ); - } - } - }; - assert("BASICS", BASICS); - assert("STANDARD", STANDARD); - assert("LEFTMOST", LEFTMOST); - assert("LEFTMOST_FIRST", LEFTMOST_FIRST); - assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); - assert("NON_OVERLAPPING", NON_OVERLAPPING); - assert("OVERLAPPING", OVERLAPPING); - assert("REGRESSION", REGRESSION); -} - -#[test] -#[should_panic] -fn stream_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(None::); - assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); -} - -#[test] -#[should_panic] -fn stream_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostLongest) - .build(None::); - assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); -} - -#[test] -#[should_panic] -fn overlapping_not_allowed_leftmost_first() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .build(None::); - assert_eq!(fsm.find_overlapping_iter("").count(), 0); -} - -#[test] -#[should_panic] -fn overlapping_not_allowed_leftmost_longest() { - let fsm = AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostLongest) - .build(None::); - assert_eq!(fsm.find_overlapping_iter("").count(), 0); -} - -#[test] -fn state_id_too_small() { - let mut patterns = vec![]; - for c1 in (b'a'..b'z').map(|b| b as char) { - for c2 in (b'a'..b'z').map(|b| b as char) { - for c3 in (b'a'..b'z').map(|b| b as char) { - patterns.push(format!("{}{}{}", c1, c2, c3)); - } - } - } - let result = - AhoCorasickBuilder::new().build_with_size::(&patterns); - assert!(result.is_err()); -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/44 -// -// In short, this test ensures that enabling ASCII case insensitivity does not -// visit an exponential number of states when filling in failure transitions. -#[test] -fn regression_ascii_case_insensitive_no_exponential() { - let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]); - assert!(ac.find("").is_none()); -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/53 -// -// This test ensures that the rare byte prefilter works in a particular corner -// case. In particular, the shift offset detected for '/' in the patterns below -// was incorrect, leading to a false negative. -#[test] -fn regression_rare_byte_prefilter() { - use crate::AhoCorasick; - - let ac = AhoCorasick::new_auto_configured(&["ab/j/", "x/"]); - assert!(ac.is_match("ab/j/")); -} - -#[test] -fn regression_case_insensitive_prefilter() { - use crate::AhoCorasickBuilder; - - for c in b'a'..b'z' { - for c2 in b'a'..b'z' { - let c = c as char; - let c2 = c2 as char; - let needle = format!("{}{}", c, c2).to_lowercase(); - let haystack = needle.to_uppercase(); - let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .prefilter(true) - .build(&[&needle]); - assert_eq!( - 1, - ac.find_iter(&haystack).count(), - "failed to find {:?} in {:?}\n\nautomaton:\n{:?}", - needle, - haystack, - ac, - ); - } - } -} - -// See: https://github.com/BurntSushi/aho-corasick/issues/64 -// -// This occurs when the rare byte prefilter is active. -#[test] -fn regression_stream_rare_byte_prefilter() { - use std::io::Read; - - // NOTE: The test only fails if this ends with j. - const MAGIC: [u8; 5] = *b"1234j"; - - // NOTE: The test fails for value in 8188..=8191 These value put the string - // to search accross two call to read because the buffer size is 8192 by - // default. - const BEGIN: usize = 8191; - - /// This is just a structure that implements Reader. The reader - /// implementation will simulate a file filled with 0, except for the MAGIC - /// string at offset BEGIN. - #[derive(Default)] - struct R { - read: usize, - } - - impl Read for R { - fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result { - //dbg!(buf.len()); - if self.read > 100000 { - return Ok(0); - } - let mut from = 0; - if self.read < BEGIN { - from = buf.len().min(BEGIN - self.read); - for x in 0..from { - buf[x] = 0; - } - self.read += from; - } - if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() { - let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from); - if to > from { - buf[from..to].copy_from_slice( - &MAGIC - [self.read - BEGIN..self.read - BEGIN + to - from], - ); - self.read += to - from; - from = to; - } - } - for x in from..buf.len() { - buf[x] = 0; - self.read += 1; - } - Ok(buf.len()) - } - } - - fn run() -> ::std::io::Result<()> { - let aut = AhoCorasickBuilder::new().build(&[&MAGIC]); - - // While reading from a vector, it works: - let mut buf = vec![]; - R::default().read_to_end(&mut buf)?; - let from_whole = aut.find_iter(&buf).next().unwrap().start(); - - //But using stream_find_iter fails! - let mut file = R::default(); - let begin = aut - .stream_find_iter(&mut file) - .next() - .expect("NOT FOUND!!!!")? // Panic here - .start(); - assert_eq!(from_whole, begin); - Ok(()) - } - - run().unwrap() -} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/.cargo-checksum.json 2023-08-15 22:24:19.000000000 +0000 @@ -1 +1 @@ -{"files":{"Cargo.lock":"a915231b52b67320c7d440eb711c99632e4b948e5dcbeb6835e18bd0d798c76a","Cargo.toml":"655f82f7efb9e6b434a4710d8b1ea5b8c2116bccc6d8a4f87a7abc9e0c69051b","LICENSE":"c23953d9deb0a3312dbeaf6c128a657f3591acee45067612fa68405eaa4525db","README.md":"c093205492ab9f00f275c50aacfc9058264d3dcc7c7d2ff83e0cc4858d1cee49","build.rs":"d53484feea4cd147cd80280ac270c24ab727386acabb043e6347c44ac5369d0e","csmith-fuzzing/README.md":"7107b70fedb0c0a0cadb3c439a49c1bd0119a6d38dc63b1aecc74d1942256ef2","src/callbacks.rs":"cb4ca440e356dde75919a5298b75cbf145c981c2e1da62907337706286dd5c9e","src/clang.rs":"6b02ae174012372d00b442b5ec5a66a6122a091217039e5ba4917578c769d01f","src/codegen/bitfield_unit.rs":"fddeaeab5859f4e82081865595b7705f5c0774d997df95fa5c655b81b9cae125","src/codegen/bitfield_unit_tests.rs":"9df86490de5e9d66ccea583dcb686dd440375dc1a3c3cf89a89d5de3883bf28a","src/codegen/dyngen.rs":"b1bca96fbd81b1c0678122df8d28f3b60cd74047a43d0d298d69feb06eecf459","src/codegen/error.rs":"5e308b8c54b68511fc8ea2ad15ddac510172c4ff460a80a265336440b0c9653d","src/codegen/helpers.rs":"b4e2ee991e83fda62b0aebd562b948eba785179cb4aa1a154d00ffad215b7be5","src/codegen/impl_debug.rs":"71d8e28873ba2de466f2637a824746963702f0511728037d72ee5670c51194cb","src/codegen/impl_partialeq.rs":"f4599e32c66179ae515a6518a3e94b686689cf59f7dd9ab618c3fb69f17d2c77","src/codegen/mod.rs":"a286fa9a31254ce317c4baad05af446b59aaa23fb80aba9f260e67d15c64ff8c","src/codegen/struct_layout.rs":"d03e66412f4bb1fa59c623873b2a22e100d029a002c07aaf4586f4852a410b54","src/deps.rs":"de4a91d1d252295e1abaf4ab1f90f7be618c67649edb12081c3a501e61398a75","src/extra_assertions.rs":"494534bd4f18b80d89b180c8a93733e6617edcf7deac413e9a73fd6e7bc9ced7","src/features.rs":"f93bb757400580a75adc6a187cdeb032ec4d6efe7d3fcb9a6864472edd875580","src/ir/analysis/derive.rs":"066d35cdb7523c5edd141394286911128261b4db23cc17520e3b3111ef1bb51e","src/ir/analysis/has_destructor.rs":"7a82f01e7e0595a31b56f7c398fa3019b3fe9100a2a73b56768f7e6943dcc3ce","src/ir/analysis/has_float.rs":"58ea1e38a59ef208400fd65d426cb5b288949df2d383b3a194fa01b99d2a87fa","src/ir/analysis/has_type_param_in_array.rs":"d1b9eb119dc14f662eb9bd1394c859f485479e4912589709cdd33f6752094e22","src/ir/analysis/has_vtable.rs":"368cf30fbe3fab7190fab48718b948caac5da8c9e797b709488716b919315636","src/ir/analysis/mod.rs":"cde4ce0667d1895008c9b2af479211c828740fcb59fa13d600cbdc100fa8bdc5","src/ir/analysis/sizedness.rs":"944443d6aab35d2dd80e4f5e59176ac1e1c463ba2f0eb25d33f1d95dfac1a6d0","src/ir/analysis/template_params.rs":"a2d2e247c2f51cd90e83f11bce0305c2e498232d015f88192b44e8522e7fd8b1","src/ir/annotations.rs":"456276ef7f9b04e40b7b10aa7570d98b11aae8efe676679881459ae878bbecfc","src/ir/comment.rs":"9c0c4789c0893b636fac42228f8a0292a06cb4f2b7431895490784dd16b7f79a","src/ir/comp.rs":"811a2abfbf8ed6925327ad005a460ca698d40a2d5d4698015e1bcd4e7d2c9cf0","src/ir/context.rs":"df486590515ffaab8b51c96699a239de202569a8718d9c4b79a8ccc8808cee69","src/ir/derive.rs":"e5581852eec87918901a129284b4965aefc8a19394187a8095779a084f28fabe","src/ir/dot.rs":"2d79d698e6ac59ce032840e62ff11103abed1d5e9e700cf383b492333eeebe1f","src/ir/enum_ty.rs":"c2d928bb1a8453922c962cb11a7ab3b737c5651599141ece8d31e21e6eb74585","src/ir/function.rs":"3e13078b36ee02142017cfbbaaeb9e64ef485a12e151096e12f54a8fde984505","src/ir/int.rs":"68a86182743ec338d58e42203364dc7c8970cb7ec3550433ca92f0c9489b4442","src/ir/item.rs":"1c79d6dd400ab01545a19214847245b440690bfe129895f164bef460ee41b857","src/ir/item_kind.rs":"7666a1ff1b8260978b790a08b4139ab56b5c65714a5652bbcec7faa7443adc36","src/ir/layout.rs":"d6bd9a14b94320f9e2517bf9fc9ffaf4220954fa24d77d90bba070dbede7392b","src/ir/mod.rs":"713cd537434567003197a123cbae679602c715e976d22f7b23dafd0826ea4c70","src/ir/module.rs":"7cae5561bcf84a5c3b1ee8f1c3336a33b7f44f0d5ffe885fb108289983fe763e","src/ir/objc.rs":"dd394c1db6546cbe5111ce5cd2f211f9839aba81c5e7228c2a68fba386bc259f","src/ir/template.rs":"3bb3e7f6ec28eff73c2032922d30b93d70da117b848e9cb02bdf6c9a74294f7f","src/ir/traversal.rs":"105d93bc2f1f55033c621667a0e55153844eec34560ae88183f799d0d0c1a6f2","src/ir/ty.rs":"2ecae57f018732b6daf1c08fc98765456a9e6a24cbceaf7f1bc004676b0113ee","src/ir/var.rs":"fe7720438af43fa3bbe3850aff331bb47131b2c21e975f92bfbcdc182789105a","src/lib.rs":"0f148aef6fd6ae814df29317fe5860d0c1747c40d5182f2518d3b81a03b6587a","src/log_stubs.rs":"9f974e041e35c8c7e29985d27ae5cd0858d68f8676d1dc005c6388d7d011707f","src/main.rs":"188cd89581490eb5f26a194cc25e4f38f3e0b93eed7ad591bc73362febd26b72","src/options.rs":"14190fae2aaad331f0660e4cc1d5a1fea0c2c88696091715867a3e7282a1d7b5","src/parse.rs":"4ffc54415eadb622ee488603862788c78361ef2c889de25259441a340c2a010f","src/regex_set.rs":"6c46357fb1ee68250e5e017cbf691f340041489ae78599eee7a5665a6ddce27f","src/time.rs":"8efe317e7c6b5ba8e0865ce7b49ca775ee8a02590f4241ef62f647fa3c22b68e"},"package":"2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"} \ No newline at end of file +{"files":{"Cargo.toml":"1c290771bddd3cde261935e253cd7574b648d1b321a0f3466d429eca3a3cce64","LICENSE":"c23953d9deb0a3312dbeaf6c128a657f3591acee45067612fa68405eaa4525db","build.rs":"4a9c4ac3759572e17de312a9d3f4ced3b6fd3c71811729e5a8d06bfbd1ac8f82","callbacks.rs":"985f5e3b19b870ec90baa89187b5049514fc5a259bc74fd6fb2ee857c52c11ff","clang.rs":"ee5130a029688f0eadc854c9873824330b6539e2eae597e2198b51e4e8f124a5","codegen/bitfield_unit.rs":"fddeaeab5859f4e82081865595b7705f5c0774d997df95fa5c655b81b9cae125","codegen/bitfield_unit_tests.rs":"9df86490de5e9d66ccea583dcb686dd440375dc1a3c3cf89a89d5de3883bf28a","codegen/dyngen.rs":"6d8bed53c6de66bc658b3186041c2b75549f49b0f0363ff18b87c8dcf2f5a05b","codegen/error.rs":"fa02274debd9064f35a627c43407e4e47ca89f2becfb1c233a500440d6c73e00","codegen/helpers.rs":"cf9e60d18d17d624f3559b6dd65e75630a16e6c1b71666f7c9656e51053d10f8","codegen/impl_debug.rs":"80df6136327b1ca8c7d1c2961290b5ab00b85b49b22c02f26a590bc68fb230af","codegen/impl_partialeq.rs":"db739d7ba6f5ba4033d6bf62c276f35217c20eab27230cf07dadf59e8b2f71bb","codegen/mod.rs":"89156a1926556d7c46b0266aabbb7c4e5a4a93fe1e5fc088f86acd3b14203f17","codegen/postprocessing/merge_extern_blocks.rs":"284457a3c75e945217bab4e5a4280fef0fcc03c31e12cc5010aab87f34c0b6c7","codegen/postprocessing/mod.rs":"160a6d6701cabf2514e23570df1bd1b648c909cc27b7c583f21d98fe0c16722e","codegen/postprocessing/sort_semantically.rs":"f465d1e8cc119082eb79c164b5cd780a370821e8bf56585b287dd3b51fc4a542","codegen/serialize.rs":"bb99633ab6a6764b84dac86a873fa64c90aa4979f26e75fbeff9af365b3fefa8","codegen/struct_layout.rs":"5685fc6caa24ac2779fbb885064043898830c00c92819e8c0e4fd9564c641c4d","deps.rs":"5ee2332fdb10325f3b0a0c6d9ba94e13eb631ef39e955fa958afc3625bdb5448","diagnostics.rs":"dc40cd5e9710922422c5c9420e2351f5d976e7a1d7275e4f4ce742cad9eb53f8","extra_assertions.rs":"494534bd4f18b80d89b180c8a93733e6617edcf7deac413e9a73fd6e7bc9ced7","features.rs":"6c17e37bdd14355c9c3f93b67e539bf001ea113a9efe287527e9021d785b5bda","ir/analysis/derive.rs":"cba290e9c4ba271e90524149ad3b874f37843bfdfab12d513cc85d2665447fd5","ir/analysis/has_destructor.rs":"e7e95c3b0989b6375cd3eabaac85a36ecc2915a1fd3700c7d26fe04e8dc83ba3","ir/analysis/has_float.rs":"a56b97bf913f132c2c63dc202b45c692c416a8c9fdc6b2baeee30362fb0d4405","ir/analysis/has_type_param_in_array.rs":"788ebb4ba2cf46a22f1e4ff3005d51f38d414b72e95355f7ff4125521e2d9525","ir/analysis/has_vtable.rs":"83efa40ae89147170eabdff1387e60aba574ca4cd4cdef22692753594f09d6c6","ir/analysis/mod.rs":"ed161d9f60306ad42af2ae70ff0eb686a36e2fb30eb94918b5e5f19af80e1db7","ir/analysis/sizedness.rs":"f0a9302f3c6ad694d76cfab11dbaf5392ecaf7f04bc7b211a5a003776b963896","ir/analysis/template_params.rs":"8f73a640cdd3b8e4e05fd5818eec8d36ba240ff131e8b785da3270c1335827a1","ir/annotations.rs":"eaacb6508b02d7d494bcaa50b9ba7acbe15f90f22130d3a24e2573909c08776f","ir/comment.rs":"4c9c20b5a3da086211e92adec0822831dbc0b7ebee98fee313edcae9ae8d55ec","ir/comp.rs":"fb32715ed8fc14bee51c344a41c1f7a8a802d4a6dceb2775034ea33a88670df7","ir/context.rs":"8b9f502e85ed563b46fc11eacb2e2140c19e7527dce4e31831cc9a571fbf87ff","ir/derive.rs":"c21e470bb0091f20bfa366110880d48984fc3cf7071fdf36eccfa64f3eca231c","ir/dot.rs":"75bdfd83d9e754ba726f6a5529ba1d9ff46f5bf49bf237452985eb008fed0854","ir/enum_ty.rs":"f4bfa6d18ba4977fb66f5d5e4a7674eded93b761404d91cdd6fdd50029db455a","ir/function.rs":"4cb04fbf40e8e8d4128c6182c84f21026b99446daf29ccba0871bedb275a5f81","ir/int.rs":"601736f0ad0949e40684a9ce89bafbfefa71743df6ee6c342e44888a0f141ae0","ir/item.rs":"5c0d0d2a7a327ac0c6ba1aadcef710b6d399c24bee3fbbd1ab6386e871c44033","ir/item_kind.rs":"33e21104b0bb824a696a52cd520567ae56158010a1df14777e68ac5f8ad7e8fa","ir/layout.rs":"e704c9c8cd1532f9890a1c6b43e3b6e691565b6acc2a9ce07486a4674497f511","ir/mod.rs":"a3b98b1732111a980a795c72eaf1e09101e842ef2de76b4f2d4a7857f8d4cee4","ir/module.rs":"f82f380274e9adbab8017bc5e484a23d945e2cb7a97ce17c9cd2a2cfc505bb54","ir/objc.rs":"0f55ff60db706241634ed8396108ec84ecbec80e0cf28f68ab580c868e0e0cb4","ir/template.rs":"3f59efa9670ca90215d4374be869c9dbecb98a8d1041e7c6e4ab69a62bb982c2","ir/traversal.rs":"a4ec73d3533d4b93386153baf6a2ca846ee51228c76ed51105229d3ddcd74466","ir/ty.rs":"7e479d601229619cf39073fc3570f4211666cc042a60ab27c810bdde0e5d5690","ir/var.rs":"40d18226706de0ee5f002d0b5617dbcba35de0605edd531c75e3a76d000f0f4f","lib.rs":"ef2927a0a84d50b6bea44d9e95f69d2dc9fc7bc75aff8fc3a5edd2919613a81c","log_stubs.rs":"9f974e041e35c8c7e29985d27ae5cd0858d68f8676d1dc005c6388d7d011707f","options/as_args.rs":"3b3547e08f0cb72fa042cde417bbc8760166d11dc0db4812e7a280c93074d2f5","options/helpers.rs":"f4a7681e29b2dcc3be9249478c499d685b9e29d4f4ca4ae8bff7a91668cd8f15","options/mod.rs":"f06194a21bf5b4a7039d1be80e5b0b3e4a310f48084a6e2b7abbb1539d0c2004","parse.rs":"fce3616e0464aa7414888e5d00d4df18c83bb3034a1c807d36a07a3c586e475a","regex_set.rs":"8b38dce6b4b34712f7eafcb2817024de18fccf0cead0c175de34f78ea4027545","time.rs":"8efe317e7c6b5ba8e0865ce7b49ca775ee8a02590f4241ef62f647fa3c22b68e"},"package":"cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"} \ No newline at end of file diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.lock 1970-01-01 00:00:00.000000000 +0000 @@ -1,446 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "aho-corasick" -version = "0.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" -dependencies = [ - "memchr", -] - -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "bindgen" -version = "0.59.2" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "clap", - "diff", - "env_logger", - "lazy_static", - "lazycell", - "log", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "tempfile", - "which", -] - -[[package]] -name = "bitflags" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clang-sys" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "2.33.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" -dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim", - "textwrap", - "unicode-width", - "vec_map", -] - -[[package]] -name = "diff" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" - -[[package]] -name = "either" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" - -[[package]] -name = "env_logger" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "getrandom" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "glob" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "libc" -version = "0.2.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" - -[[package]] -name = "libloading" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" -dependencies = [ - "cfg-if", - "winapi", -] - -[[package]] -name = "log" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "memchr" -version = "2.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" - -[[package]] -name = "minimal-lexical" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677" - -[[package]] -name = "nom" -version = "7.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" -dependencies = [ - "memchr", - "minimal-lexical", - "version_check", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - -[[package]] -name = "ppv-lite86" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" - -[[package]] -name = "proc-macro2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", - "rand_hc", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" -dependencies = [ - "rand_core", -] - -[[package]] -name = "redox_syscall" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "shlex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" - -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "tempfile" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" -dependencies = [ - "cfg-if", - "libc", - "rand", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "termcolor" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - -[[package]] -name = "version_check" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "which" -version = "4.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc009ab82a2afc94b9e467ab4214aee9cad1356cd9191264203d7d72006e00d" -dependencies = [ - "either", - "lazy_static", - "libc", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/Cargo.toml 2023-08-15 22:24:19.000000000 +0000 @@ -11,28 +11,41 @@ [package] edition = "2018" +rust-version = "1.60.0" name = "bindgen" -version = "0.59.2" -authors = ["Jyun-Yan You ", "Emilio Cobos Álvarez ", "Nick Fitzgerald ", "The Servo project developers"] +version = "0.65.1" +authors = [ + "Jyun-Yan You ", + "Emilio Cobos Álvarez ", + "Nick Fitzgerald ", + "The Servo project developers", +] build = "build.rs" -include = ["LICENSE", "README.md", "Cargo.toml", "build.rs", "src/*.rs", "src/**/*.rs"] description = "Automatically generates Rust FFI bindings to C and C++ libraries." homepage = "https://rust-lang.github.io/rust-bindgen/" documentation = "https://docs.rs/bindgen" -readme = "README.md" -keywords = ["bindings", "ffi", "code-generation"] -categories = ["external-ffi-bindings", "development-tools::ffi"] +readme = "../README.md" +keywords = [ + "bindings", + "ffi", + "code-generation", +] +categories = [ + "external-ffi-bindings", + "development-tools::ffi", +] license = "BSD-3-Clause" repository = "https://github.com/rust-lang/rust-bindgen" [lib] -path = "src/lib.rs" - -[[bin]] name = "bindgen" -path = "src/main.rs" -doc = false -required-features = ["clap"] +path = "lib.rs" + +[dependencies.annotate-snippets] +version = "0.9.1" +features = ["color"] +optional = true + [dependencies.bitflags] version = "1.0.3" @@ -43,14 +56,6 @@ version = "1" features = ["clang_6_0"] -[dependencies.clap] -version = "2" -optional = true - -[dependencies.env_logger] -version = "0.9.0" -optional = true - [dependencies.lazy_static] version = "1" @@ -64,6 +69,9 @@ [dependencies.peeking_take_while] version = "0.1.2" +[dependencies.prettyplease] +version = "0.2.0" + [dependencies.proc-macro2] version = "1" default-features = false @@ -73,8 +81,11 @@ default-features = false [dependencies.regex] -version = "1.0" -features = ["std", "unicode"] +version = "1.5" +features = [ + "std", + "unicode", +] default-features = false [dependencies.rustc-hash] @@ -83,33 +94,31 @@ [dependencies.shlex] version = "1" +[dependencies.syn] +version = "2.0" +features = [ + "full", + "extra-traits", + "visit-mut", +] + [dependencies.which] version = "4.2.1" optional = true default-features = false -[dev-dependencies.clap] -version = "2" - -[dev-dependencies.diff] -version = "0.1" - -[dev-dependencies.shlex] -version = "1" - -[dev-dependencies.tempfile] -version = "3" [features] -default = ["logging", "clap", "runtime", "which-rustfmt"] -logging = ["env_logger", "log"] +__cli = [] +default = [ + "logging", + "runtime", + "which-rustfmt", +] +experimental = ["annotate-snippets"] +logging = ["log"] runtime = ["clang-sys/runtime"] static = ["clang-sys/static"] -testing_only_docs = [] testing_only_extra_assertions = [] -testing_only_libclang_3_9 = [] -testing_only_libclang_4 = [] testing_only_libclang_5 = [] testing_only_libclang_9 = [] which-rustfmt = ["which"] -[badges.travis-ci] -repository = "rust-lang/rust-bindgen" diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/README.md clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/README.md --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/README.md 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -[![crates.io](https://img.shields.io/crates/v/bindgen.svg)](https://crates.io/crates/bindgen) -[![docs.rs](https://docs.rs/bindgen/badge.svg)](https://docs.rs/bindgen/) - -# `bindgen` - -**`bindgen` automatically generates Rust FFI bindings to C (and some C++) libraries.** - -For example, given the C header `doggo.h`: - -```c -typedef struct Doggo { - int many; - char wow; -} Doggo; - -void eleven_out_of_ten_majestic_af(Doggo* pupper); -``` - -`bindgen` produces Rust FFI code allowing you to call into the `doggo` library's -functions and use its types: - -```rust -/* automatically generated by rust-bindgen 0.99.9 */ - -#[repr(C)] -pub struct Doggo { - pub many: ::std::os::raw::c_int, - pub wow: ::std::os::raw::c_char, -} - -extern "C" { - pub fn eleven_out_of_ten_majestic_af(pupper: *mut Doggo); -} -``` - -## Users Guide - -[📚 Read the `bindgen` users guide here! 📚](https://rust-lang.github.io/rust-bindgen) - -## MSRV - -The minimum supported Rust version is **1.46**. - -No MSRV bump policy has been established yet, so MSRV may increase in any release. - -## API Reference - -[API reference documentation is on docs.rs](https://docs.rs/bindgen) - -## Environment Variables - -In addition to the [library API](https://docs.rs/bindgen) and [executable command-line API][bindgen-cmdline], -`bindgen` can be controlled through environment variables. - -End-users should set these environment variables to modify `bindgen`'s behavior without modifying the source code of direct consumers of `bindgen`. - -- `BINDGEN_EXTRA_CLANG_ARGS`: extra arguments to pass to `clang` - - Arguments are whitespace-separated - - Use shell-style quoting to pass through whitespace - - Examples: - - Specify alternate sysroot: `--sysroot=/path/to/sysroot` - - Add include search path with spaces: `-I"/path/with spaces"` -- `BINDGEN_EXTRA_CLANG_ARGS_`: similar to `BINDGEN_EXTRA_CLANG_ARGS`, - but used to set per-target arguments to pass to clang. Useful to set system include - directories in a target-specific way in cross-compilation environments with multiple targets. - Has precedence over `BINDGEN_EXTRA_CLANG_ARGS`. - -Additionally, `bindgen` uses `libclang` to parse C and C++ header files. -To modify how `bindgen` searches for `libclang`, see the [`clang-sys` documentation][clang-sys-env]. -For more details on how `bindgen` uses `libclang`, see the [`bindgen` users guide][bindgen-book-clang]. - -## Releases - -We don't follow a specific release calendar, but if you need a release please -file an issue requesting that (ping `@emilio` for increased effectiveness). - -## Contributing - -[See `CONTRIBUTING.md` for hacking on `bindgen`!](./CONTRIBUTING.md) - -[bindgen-cmdline]: https://rust-lang.github.io/rust-bindgen/command-line-usage.html -[clang-sys-env]: https://github.com/KyleMayes/clang-sys#environment-variables -[bindgen-book-clang]: https://rust-lang.github.io/rust-bindgen/requirements.html#clang diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/build.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/build.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/build.rs 2023-02-13 06:00:43.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/build.rs 2023-08-15 22:24:19.000000000 +0000 @@ -1,76 +1,15 @@ -mod target { - use std::env; - use std::fs::File; - use std::io::Write; - use std::path::{Path, PathBuf}; - - pub fn main() { - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - - let mut dst = - File::create(Path::new(&out_dir).join("host-target.txt")).unwrap(); - dst.write_all(env::var("TARGET").unwrap().as_bytes()) - .unwrap(); - } -} - -mod testgen { - use std::char; - use std::env; - use std::ffi::OsStr; - use std::fs::{self, File}; - use std::io::Write; - use std::path::{Path, PathBuf}; - - pub fn main() { - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - let mut dst = - File::create(Path::new(&out_dir).join("tests.rs")).unwrap(); - - let manifest_dir = - PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - let headers_dir = manifest_dir.join("tests").join("headers"); - - let headers = match fs::read_dir(headers_dir) { - Ok(dir) => dir, - // We may not have headers directory after packaging. - Err(..) => return, - }; - - let entries = - headers.map(|result| result.expect("Couldn't read header file")); - - println!("cargo:rerun-if-changed=tests/headers"); - - for entry in entries { - match entry.path().extension().and_then(OsStr::to_str) { - Some("h") | Some("hpp") => { - let func = entry - .file_name() - .to_str() - .unwrap() - .replace(|c| !char::is_alphanumeric(c), "_") - .replace("__", "_") - .to_lowercase(); - writeln!( - dst, - "test_header!(header_{}, {:?});", - func, - entry.path(), - ) - .unwrap(); - } - _ => {} - } - } - - dst.flush().unwrap(); - } -} +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; fn main() { - target::main(); - testgen::main(); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + + let mut dst = + File::create(Path::new(&out_dir).join("host-target.txt")).unwrap(); + dst.write_all(env::var("TARGET").unwrap().as_bytes()) + .unwrap(); // On behalf of clang_sys, rebuild ourselves if important configuration // variables change, to ensure that bindings get rebuilt if the @@ -85,6 +24,6 @@ ); println!( "cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{}", - std::env::var("TARGET").unwrap().replace("-", "_") + std::env::var("TARGET").unwrap().replace('-', "_") ); } diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/callbacks.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,178 @@ +//! A public API for more fine-grained customization of bindgen behavior. + +pub use crate::ir::analysis::DeriveTrait; +pub use crate::ir::derive::CanDerive as ImplementsTrait; +pub use crate::ir::enum_ty::{EnumVariantCustomBehavior, EnumVariantValue}; +pub use crate::ir::int::IntKind; +use std::fmt; + +/// An enum to allow ignoring parsing of macros. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MacroParsingBehavior { + /// Ignore the macro, generating no code for it, or anything that depends on + /// it. + Ignore, + /// The default behavior bindgen would have otherwise. + Default, +} + +impl Default for MacroParsingBehavior { + fn default() -> Self { + MacroParsingBehavior::Default + } +} + +/// A trait to allow configuring different kinds of types in different +/// situations. +pub trait ParseCallbacks: fmt::Debug { + #[cfg(feature = "__cli")] + #[doc(hidden)] + fn cli_args(&self) -> Vec { + vec![] + } + + /// This function will be run on every macro that is identified. + fn will_parse_macro(&self, _name: &str) -> MacroParsingBehavior { + MacroParsingBehavior::Default + } + + /// This function will run for every extern variable and function. The returned value determines + /// the name visible in the bindings. + fn generated_name_override( + &self, + _item_info: ItemInfo<'_>, + ) -> Option { + None + } + + /// This function will run for every extern variable and function. The returned value determines + /// the link name in the bindings. + fn generated_link_name_override( + &self, + _item_info: ItemInfo<'_>, + ) -> Option { + None + } + + /// The integer kind an integer macro should have, given a name and the + /// value of that macro, or `None` if you want the default to be chosen. + fn int_macro(&self, _name: &str, _value: i64) -> Option { + None + } + + /// This will be run on every string macro. The callback cannot influence the further + /// treatment of the macro, but may use the value to generate additional code or configuration. + fn str_macro(&self, _name: &str, _value: &[u8]) {} + + /// This will be run on every function-like macro. The callback cannot + /// influence the further treatment of the macro, but may use the value to + /// generate additional code or configuration. + /// + /// The first parameter represents the name and argument list (including the + /// parentheses) of the function-like macro. The second parameter represents + /// the expansion of the macro as a sequence of tokens. + fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} + + /// This function should return whether, given an enum variant + /// name, and value, this enum variant will forcibly be a constant. + fn enum_variant_behavior( + &self, + _enum_name: Option<&str>, + _original_variant_name: &str, + _variant_value: EnumVariantValue, + ) -> Option { + None + } + + /// Allows to rename an enum variant, replacing `_original_variant_name`. + fn enum_variant_name( + &self, + _enum_name: Option<&str>, + _original_variant_name: &str, + _variant_value: EnumVariantValue, + ) -> Option { + None + } + + /// Allows to rename an item, replacing `_original_item_name`. + fn item_name(&self, _original_item_name: &str) -> Option { + None + } + + /// This will be called on every file inclusion, with the full path of the included file. + fn include_file(&self, _filename: &str) {} + + /// This will be called every time `bindgen` reads an environment variable whether it has any + /// content or not. + fn read_env_var(&self, _key: &str) {} + + /// This will be called to determine whether a particular blocklisted type + /// implements a trait or not. This will be used to implement traits on + /// other types containing the blocklisted type. + /// + /// * `None`: use the default behavior + /// * `Some(ImplementsTrait::Yes)`: `_name` implements `_derive_trait` + /// * `Some(ImplementsTrait::Manually)`: any type including `_name` can't + /// derive `_derive_trait` but can implemented it manually + /// * `Some(ImplementsTrait::No)`: `_name` doesn't implement `_derive_trait` + fn blocklisted_type_implements_trait( + &self, + _name: &str, + _derive_trait: DeriveTrait, + ) -> Option { + None + } + + /// Provide a list of custom derive attributes. + /// + /// If no additional attributes are wanted, this function should return an + /// empty `Vec`. + fn add_derives(&self, _info: &DeriveInfo<'_>) -> Vec { + vec![] + } + + /// Process a source code comment. + fn process_comment(&self, _comment: &str) -> Option { + None + } +} + +/// Relevant information about a type to which new derive attributes will be added using +/// [`ParseCallbacks::add_derives`]. +#[derive(Debug)] +#[non_exhaustive] +pub struct DeriveInfo<'a> { + /// The name of the type. + pub name: &'a str, + /// The kind of the type. + pub kind: TypeKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// The kind of the current type. +pub enum TypeKind { + /// The type is a Rust `struct`. + Struct, + /// The type is a Rust `enum`. + Enum, + /// The type is a Rust `union`. + Union, +} + +/// A struct providing information about the item being passed to [`ParseCallbacks::generated_name_override`]. +#[non_exhaustive] +pub struct ItemInfo<'a> { + /// The name of the item + pub name: &'a str, + /// The kind of item + pub kind: ItemKind, +} + +/// An enum indicating the kind of item for an ItemInfo. +#[non_exhaustive] +pub enum ItemKind { + /// A Function + Function, + /// A Variable + Var, +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/clang.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/clang.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/clang.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/clang.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,2236 @@ +//! A higher level Clang API built on top of the generated bindings in the +//! `clang_sys` module. + +#![allow(non_upper_case_globals, dead_code)] +#![deny(clippy::missing_docs_in_private_items)] + +use crate::ir::context::BindgenContext; +use clang_sys::*; +use std::ffi::{CStr, CString}; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; +use std::os::raw::{c_char, c_int, c_longlong, c_uint, c_ulong, c_ulonglong}; +use std::{mem, ptr, slice}; + +/// Type representing a clang attribute. +/// +/// Values of this type can be used to check for different attributes using the `has_attrs` +/// function. +pub(crate) struct Attribute { + name: &'static [u8], + kind: Option, + token_kind: CXTokenKind, +} + +impl Attribute { + /// A `warn_unused_result` attribute. + pub(crate) const MUST_USE: Self = Self { + name: b"warn_unused_result", + // FIXME(emilio): clang-sys doesn't expose `CXCursor_WarnUnusedResultAttr` (from clang 9). + kind: Some(440), + token_kind: CXToken_Identifier, + }; + + /// A `_Noreturn` attribute. + pub(crate) const NO_RETURN: Self = Self { + name: b"_Noreturn", + kind: None, + token_kind: CXToken_Keyword, + }; + + /// A `[[noreturn]]` attribute. + pub(crate) const NO_RETURN_CPP: Self = Self { + name: b"noreturn", + kind: None, + token_kind: CXToken_Identifier, + }; +} + +/// A cursor into the Clang AST, pointing to an AST node. +/// +/// We call the AST node pointed to by the cursor the cursor's "referent". +#[derive(Copy, Clone)] +pub(crate) struct Cursor { + x: CXCursor, +} + +impl fmt::Debug for Cursor { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "Cursor({} kind: {}, loc: {}, usr: {:?})", + self.spelling(), + kind_to_str(self.kind()), + self.location(), + self.usr() + ) + } +} + +impl Cursor { + /// Get the Unified Symbol Resolution for this cursor's referent, if + /// available. + /// + /// The USR can be used to compare entities across translation units. + pub(crate) fn usr(&self) -> Option { + let s = unsafe { cxstring_into_string(clang_getCursorUSR(self.x)) }; + if s.is_empty() { + None + } else { + Some(s) + } + } + + /// Is this cursor's referent a declaration? + pub(crate) fn is_declaration(&self) -> bool { + unsafe { clang_isDeclaration(self.kind()) != 0 } + } + + /// Is this cursor's referent an anonymous record or so? + pub(crate) fn is_anonymous(&self) -> bool { + unsafe { clang_Cursor_isAnonymous(self.x) != 0 } + } + + /// Get this cursor's referent's spelling. + pub(crate) fn spelling(&self) -> String { + unsafe { cxstring_into_string(clang_getCursorSpelling(self.x)) } + } + + /// Get this cursor's referent's display name. + /// + /// This is not necessarily a valid identifier. It includes extra + /// information, such as parameters for a function, etc. + pub(crate) fn display_name(&self) -> String { + unsafe { cxstring_into_string(clang_getCursorDisplayName(self.x)) } + } + + /// Get the mangled name of this cursor's referent. + pub(crate) fn mangling(&self) -> String { + unsafe { cxstring_into_string(clang_Cursor_getMangling(self.x)) } + } + + /// Gets the C++ manglings for this cursor, or an error if the manglings + /// are not available. + pub(crate) fn cxx_manglings(&self) -> Result, ()> { + use clang_sys::*; + unsafe { + let manglings = clang_Cursor_getCXXManglings(self.x); + if manglings.is_null() { + return Err(()); + } + let count = (*manglings).Count as usize; + + let mut result = Vec::with_capacity(count); + for i in 0..count { + let string_ptr = (*manglings).Strings.add(i); + result.push(cxstring_to_string_leaky(*string_ptr)); + } + clang_disposeStringSet(manglings); + Ok(result) + } + } + + /// Returns whether the cursor refers to a built-in definition. + pub(crate) fn is_builtin(&self) -> bool { + let (file, _, _, _) = self.location().location(); + file.name().is_none() + } + + /// Get the `Cursor` for this cursor's referent's lexical parent. + /// + /// The lexical parent is the parent of the definition. The semantic parent + /// is the parent of the declaration. Generally, the lexical parent doesn't + /// have any effect on semantics, while the semantic parent does. + /// + /// In the following snippet, the `Foo` class would be the semantic parent + /// of the out-of-line `method` definition, while the lexical parent is the + /// translation unit. + /// + /// ```c++ + /// class Foo { + /// void method(); + /// }; + /// + /// void Foo::method() { /* ... */ } + /// ``` + pub(crate) fn lexical_parent(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getCursorLexicalParent(self.x), + } + } + } + + /// Get the referent's semantic parent, if one is available. + /// + /// See documentation for `lexical_parent` for details on semantic vs + /// lexical parents. + pub(crate) fn fallible_semantic_parent(&self) -> Option { + let sp = unsafe { + Cursor { + x: clang_getCursorSemanticParent(self.x), + } + }; + if sp == *self || !sp.is_valid() { + return None; + } + Some(sp) + } + + /// Get the referent's semantic parent. + /// + /// See documentation for `lexical_parent` for details on semantic vs + /// lexical parents. + pub(crate) fn semantic_parent(&self) -> Cursor { + self.fallible_semantic_parent().unwrap() + } + + /// Return the number of template arguments used by this cursor's referent, + /// if the referent is either a template instantiation. Returns `None` + /// otherwise. + /// + /// NOTE: This may not return `Some` for partial template specializations, + /// see #193 and #194. + pub(crate) fn num_template_args(&self) -> Option { + // XXX: `clang_Type_getNumTemplateArguments` is sort of reliable, while + // `clang_Cursor_getNumTemplateArguments` is totally unreliable. + // Therefore, try former first, and only fallback to the latter if we + // have to. + self.cur_type() + .num_template_args() + .or_else(|| { + let n: c_int = + unsafe { clang_Cursor_getNumTemplateArguments(self.x) }; + + if n >= 0 { + Some(n as u32) + } else { + debug_assert_eq!(n, -1); + None + } + }) + .or_else(|| { + let canonical = self.canonical(); + if canonical != *self { + canonical.num_template_args() + } else { + None + } + }) + } + + /// Get a cursor pointing to this referent's containing translation unit. + /// + /// Note that we shouldn't create a `TranslationUnit` struct here, because + /// bindgen assumes there will only be one of them alive at a time, and + /// disposes it on drop. That can change if this would be required, but I + /// think we can survive fine without it. + pub(crate) fn translation_unit(&self) -> Cursor { + assert!(self.is_valid()); + unsafe { + let tu = clang_Cursor_getTranslationUnit(self.x); + let cursor = Cursor { + x: clang_getTranslationUnitCursor(tu), + }; + assert!(cursor.is_valid()); + cursor + } + } + + /// Is the referent a top level construct? + pub(crate) fn is_toplevel(&self) -> bool { + let mut semantic_parent = self.fallible_semantic_parent(); + + while semantic_parent.is_some() && + (semantic_parent.unwrap().kind() == CXCursor_Namespace || + semantic_parent.unwrap().kind() == + CXCursor_NamespaceAlias || + semantic_parent.unwrap().kind() == CXCursor_NamespaceRef) + { + semantic_parent = + semantic_parent.unwrap().fallible_semantic_parent(); + } + + let tu = self.translation_unit(); + // Yes, this can happen with, e.g., macro definitions. + semantic_parent == tu.fallible_semantic_parent() + } + + /// There are a few kinds of types that we need to treat specially, mainly + /// not tracking the type declaration but the location of the cursor, given + /// clang doesn't expose a proper declaration for these types. + pub(crate) fn is_template_like(&self) -> bool { + matches!( + self.kind(), + CXCursor_ClassTemplate | + CXCursor_ClassTemplatePartialSpecialization | + CXCursor_TypeAliasTemplateDecl + ) + } + + /// Is this Cursor pointing to a function-like macro definition? + pub(crate) fn is_macro_function_like(&self) -> bool { + unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 } + } + + /// Get the kind of referent this cursor is pointing to. + pub(crate) fn kind(&self) -> CXCursorKind { + self.x.kind + } + + /// Returns true if the cursor is a definition + pub(crate) fn is_definition(&self) -> bool { + unsafe { clang_isCursorDefinition(self.x) != 0 } + } + + /// Is the referent a template specialization? + pub(crate) fn is_template_specialization(&self) -> bool { + self.specialized().is_some() + } + + /// Is the referent a fully specialized template specialization without any + /// remaining free template arguments? + pub(crate) fn is_fully_specialized_template(&self) -> bool { + self.is_template_specialization() && + self.kind() != CXCursor_ClassTemplatePartialSpecialization && + self.num_template_args().unwrap_or(0) > 0 + } + + /// Is the referent a template specialization that still has remaining free + /// template arguments? + pub(crate) fn is_in_non_fully_specialized_template(&self) -> bool { + if self.is_toplevel() { + return false; + } + + let parent = self.semantic_parent(); + if parent.is_fully_specialized_template() { + return false; + } + + if !parent.is_template_like() { + return parent.is_in_non_fully_specialized_template(); + } + + true + } + + /// Is the referent any kind of template parameter? + pub(crate) fn is_template_parameter(&self) -> bool { + matches!( + self.kind(), + CXCursor_TemplateTemplateParameter | + CXCursor_TemplateTypeParameter | + CXCursor_NonTypeTemplateParameter + ) + } + + /// Does the referent's type or value depend on a template parameter? + pub(crate) fn is_dependent_on_template_parameter(&self) -> bool { + fn visitor( + found_template_parameter: &mut bool, + cur: Cursor, + ) -> CXChildVisitResult { + // If we found a template parameter, it is dependent. + if cur.is_template_parameter() { + *found_template_parameter = true; + return CXChildVisit_Break; + } + + // Get the referent and traverse it as well. + if let Some(referenced) = cur.referenced() { + if referenced.is_template_parameter() { + *found_template_parameter = true; + return CXChildVisit_Break; + } + + referenced + .visit(|next| visitor(found_template_parameter, next)); + if *found_template_parameter { + return CXChildVisit_Break; + } + } + + // Continue traversing the AST at the original cursor. + CXChildVisit_Recurse + } + + if self.is_template_parameter() { + return true; + } + + let mut found_template_parameter = false; + self.visit(|next| visitor(&mut found_template_parameter, next)); + + found_template_parameter + } + + /// Is this cursor pointing a valid referent? + pub(crate) fn is_valid(&self) -> bool { + unsafe { clang_isInvalid(self.kind()) == 0 } + } + + /// Get the source location for the referent. + pub(crate) fn location(&self) -> SourceLocation { + unsafe { + SourceLocation { + x: clang_getCursorLocation(self.x), + } + } + } + + /// Get the source location range for the referent. + pub(crate) fn extent(&self) -> CXSourceRange { + unsafe { clang_getCursorExtent(self.x) } + } + + /// Get the raw declaration comment for this referent, if one exists. + pub(crate) fn raw_comment(&self) -> Option { + let s = unsafe { + cxstring_into_string(clang_Cursor_getRawCommentText(self.x)) + }; + if s.is_empty() { + None + } else { + Some(s) + } + } + + /// Get the referent's parsed comment. + pub(crate) fn comment(&self) -> Comment { + unsafe { + Comment { + x: clang_Cursor_getParsedComment(self.x), + } + } + } + + /// Get the referent's type. + pub(crate) fn cur_type(&self) -> Type { + unsafe { + Type { + x: clang_getCursorType(self.x), + } + } + } + + /// Given that this cursor's referent is a reference to another type, or is + /// a declaration, get the cursor pointing to the referenced type or type of + /// the declared thing. + pub(crate) fn definition(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getCursorDefinition(self.x), + }; + + if ret.is_valid() && ret.kind() != CXCursor_NoDeclFound { + Some(ret) + } else { + None + } + } + } + + /// Given that this cursor's referent is reference type, get the cursor + /// pointing to the referenced type. + pub(crate) fn referenced(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getCursorReferenced(self.x), + }; + + if ret.is_valid() { + Some(ret) + } else { + None + } + } + } + + /// Get the canonical cursor for this referent. + /// + /// Many types can be declared multiple times before finally being properly + /// defined. This method allows us to get the canonical cursor for the + /// referent type. + pub(crate) fn canonical(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getCanonicalCursor(self.x), + } + } + } + + /// Given that this cursor points to either a template specialization or a + /// template instantiation, get a cursor pointing to the template definition + /// that is being specialized. + pub(crate) fn specialized(&self) -> Option { + unsafe { + let ret = Cursor { + x: clang_getSpecializedCursorTemplate(self.x), + }; + if ret.is_valid() { + Some(ret) + } else { + None + } + } + } + + /// Assuming that this cursor's referent is a template declaration, get the + /// kind of cursor that would be generated for its specializations. + pub(crate) fn template_kind(&self) -> CXCursorKind { + unsafe { clang_getTemplateCursorKind(self.x) } + } + + /// Traverse this cursor's referent and its children. + /// + /// Call the given function on each AST node traversed. + pub(crate) fn visit(&self, mut visitor: Visitor) + where + Visitor: FnMut(Cursor) -> CXChildVisitResult, + { + let data = &mut visitor as *mut Visitor; + unsafe { + clang_visitChildren(self.x, visit_children::, data.cast()); + } + } + + /// Collect all of this cursor's children into a vec and return them. + pub(crate) fn collect_children(&self) -> Vec { + let mut children = vec![]; + self.visit(|c| { + children.push(c); + CXChildVisit_Continue + }); + children + } + + /// Does this cursor have any children? + pub(crate) fn has_children(&self) -> bool { + let mut has_children = false; + self.visit(|_| { + has_children = true; + CXChildVisit_Break + }); + has_children + } + + /// Does this cursor have at least `n` children? + pub(crate) fn has_at_least_num_children(&self, n: usize) -> bool { + assert!(n > 0); + let mut num_left = n; + self.visit(|_| { + num_left -= 1; + if num_left == 0 { + CXChildVisit_Break + } else { + CXChildVisit_Continue + } + }); + num_left == 0 + } + + /// Returns whether the given location contains a cursor with the given + /// kind in the first level of nesting underneath (doesn't look + /// recursively). + pub(crate) fn contains_cursor(&self, kind: CXCursorKind) -> bool { + let mut found = false; + + self.visit(|c| { + if c.kind() == kind { + found = true; + CXChildVisit_Break + } else { + CXChildVisit_Continue + } + }); + + found + } + + /// Is the referent an inlined function? + pub(crate) fn is_inlined_function(&self) -> bool { + unsafe { clang_Cursor_isFunctionInlined(self.x) != 0 } + } + + /// Is the referent a defaulted function? + pub(crate) fn is_defaulted_function(&self) -> bool { + unsafe { clang_CXXMethod_isDefaulted(self.x) != 0 } + } + + /// Is the referent a deleted function? + pub(crate) fn is_deleted_function(&self) -> bool { + // Unfortunately, libclang doesn't yet have an API for checking if a + // member function is deleted, but the following should be a good + // enough approximation. + // Deleted functions are implicitly inline according to paragraph 4 of + // [dcl.fct.def.delete] in the C++ standard. Normal inline functions + // have a definition in the same translation unit, so if this is an + // inline function without a definition, and it's not a defaulted + // function, we can reasonably safely conclude that it's a deleted + // function. + self.is_inlined_function() && + self.definition().is_none() && + !self.is_defaulted_function() + } + + /// Is the referent a bit field declaration? + pub(crate) fn is_bit_field(&self) -> bool { + unsafe { clang_Cursor_isBitField(self.x) != 0 } + } + + /// Get a cursor to the bit field's width expression, or `None` if it's not + /// a bit field. + pub(crate) fn bit_width_expr(&self) -> Option { + if !self.is_bit_field() { + return None; + } + + let mut result = None; + self.visit(|cur| { + // The first child may or may not be a TypeRef, depending on whether + // the field's type is builtin. Skip it. + if cur.kind() == CXCursor_TypeRef { + return CXChildVisit_Continue; + } + + // The next expression or literal is the bit width. + result = Some(cur); + + CXChildVisit_Break + }); + + result + } + + /// Get the width of this cursor's referent bit field, or `None` if the + /// referent is not a bit field or if the width could not be evaluated. + pub(crate) fn bit_width(&self) -> Option { + // It is not safe to check the bit width without ensuring it doesn't + // depend on a template parameter. See + // https://github.com/rust-lang/rust-bindgen/issues/2239 + if self.bit_width_expr()?.is_dependent_on_template_parameter() { + return None; + } + + unsafe { + let w = clang_getFieldDeclBitWidth(self.x); + if w == -1 { + None + } else { + Some(w as u32) + } + } + } + + /// Get the integer representation type used to hold this cursor's referent + /// enum type. + pub(crate) fn enum_type(&self) -> Option { + unsafe { + let t = Type { + x: clang_getEnumDeclIntegerType(self.x), + }; + if t.is_valid() { + Some(t) + } else { + None + } + } + } + + /// Get the boolean constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_boolean(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + Some(clang_getEnumConstantDeclValue(self.x) != 0) + } else { + None + } + } + } + + /// Get the signed constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_signed(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + #[allow(clippy::unnecessary_cast)] + Some(clang_getEnumConstantDeclValue(self.x) as i64) + } else { + None + } + } + } + + /// Get the unsigned constant value for this cursor's enum variant referent. + /// + /// Returns None if the cursor's referent is not an enum variant. + pub(crate) fn enum_val_unsigned(&self) -> Option { + unsafe { + if self.kind() == CXCursor_EnumConstantDecl { + #[allow(clippy::unnecessary_cast)] + Some(clang_getEnumConstantDeclUnsignedValue(self.x) as u64) + } else { + None + } + } + } + + /// Does this cursor have the given attributes? + pub(crate) fn has_attrs( + &self, + attrs: &[Attribute; N], + ) -> [bool; N] { + let mut found_attrs = [false; N]; + let mut found_count = 0; + + self.visit(|cur| { + let kind = cur.kind(); + for (idx, attr) in attrs.iter().enumerate() { + let found_attr = &mut found_attrs[idx]; + if !*found_attr { + // `attr.name` and` attr.token_kind` are checked against unexposed attributes only. + if attr.kind.map_or(false, |k| k == kind) || + (kind == CXCursor_UnexposedAttr && + cur.tokens().iter().any(|t| { + t.kind == attr.token_kind && + t.spelling() == attr.name + })) + { + *found_attr = true; + found_count += 1; + + if found_count == N { + return CXChildVisit_Break; + } + } + } + } + + CXChildVisit_Continue + }); + + found_attrs + } + + /// Given that this cursor's referent is a `typedef`, get the `Type` that is + /// being aliased. + pub(crate) fn typedef_type(&self) -> Option { + let inner = Type { + x: unsafe { clang_getTypedefDeclUnderlyingType(self.x) }, + }; + + if inner.is_valid() { + Some(inner) + } else { + None + } + } + + /// Get the linkage kind for this cursor's referent. + /// + /// This only applies to functions and variables. + pub(crate) fn linkage(&self) -> CXLinkageKind { + unsafe { clang_getCursorLinkage(self.x) } + } + + /// Get the visibility of this cursor's referent. + pub(crate) fn visibility(&self) -> CXVisibilityKind { + unsafe { clang_getCursorVisibility(self.x) } + } + + /// Given that this cursor's referent is a function, return cursors to its + /// parameters. + /// + /// Returns None if the cursor's referent is not a function/method call or + /// declaration. + pub(crate) fn args(&self) -> Option> { + // match self.kind() { + // CXCursor_FunctionDecl | + // CXCursor_CXXMethod => { + self.num_args().ok().map(|num| { + (0..num) + .map(|i| Cursor { + x: unsafe { clang_Cursor_getArgument(self.x, i as c_uint) }, + }) + .collect() + }) + } + + /// Given that this cursor's referent is a function/method call or + /// declaration, return the number of arguments it takes. + /// + /// Returns Err if the cursor's referent is not a function/method call or + /// declaration. + pub(crate) fn num_args(&self) -> Result { + unsafe { + let w = clang_Cursor_getNumArguments(self.x); + if w == -1 { + Err(()) + } else { + Ok(w as u32) + } + } + } + + /// Get the access specifier for this cursor's referent. + pub(crate) fn access_specifier(&self) -> CX_CXXAccessSpecifier { + unsafe { clang_getCXXAccessSpecifier(self.x) } + } + + /// Is the cursor's referrent publically accessible in C++? + /// + /// Returns true if self.access_specifier() is `CX_CXXPublic` or + /// `CX_CXXInvalidAccessSpecifier`. + pub(crate) fn public_accessible(&self) -> bool { + let access = self.access_specifier(); + access == CX_CXXPublic || access == CX_CXXInvalidAccessSpecifier + } + + /// Is this cursor's referent a field declaration that is marked as + /// `mutable`? + pub(crate) fn is_mutable_field(&self) -> bool { + unsafe { clang_CXXField_isMutable(self.x) != 0 } + } + + /// Get the offset of the field represented by the Cursor. + pub(crate) fn offset_of_field(&self) -> Result { + let offset = unsafe { clang_Cursor_getOffsetOfField(self.x) }; + + if offset < 0 { + Err(LayoutError::from(offset as i32)) + } else { + Ok(offset as usize) + } + } + + /// Is this cursor's referent a member function that is declared `static`? + pub(crate) fn method_is_static(&self) -> bool { + unsafe { clang_CXXMethod_isStatic(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is declared `const`? + pub(crate) fn method_is_const(&self) -> bool { + unsafe { clang_CXXMethod_isConst(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is virtual? + pub(crate) fn method_is_virtual(&self) -> bool { + unsafe { clang_CXXMethod_isVirtual(self.x) != 0 } + } + + /// Is this cursor's referent a member function that is pure virtual? + pub(crate) fn method_is_pure_virtual(&self) -> bool { + unsafe { clang_CXXMethod_isPureVirtual(self.x) != 0 } + } + + /// Is this cursor's referent a struct or class with virtual members? + pub(crate) fn is_virtual_base(&self) -> bool { + unsafe { clang_isVirtualBase(self.x) != 0 } + } + + /// Try to evaluate this cursor. + pub(crate) fn evaluate(&self) -> Option { + EvalResult::new(*self) + } + + /// Return the result type for this cursor + pub(crate) fn ret_type(&self) -> Option { + let rt = Type { + x: unsafe { clang_getCursorResultType(self.x) }, + }; + if rt.is_valid() { + Some(rt) + } else { + None + } + } + + /// Gets the tokens that correspond to that cursor. + pub(crate) fn tokens(&self) -> RawTokens { + RawTokens::new(self) + } + + /// Gets the tokens that correspond to that cursor as `cexpr` tokens. + pub(crate) fn cexpr_tokens(self) -> Vec { + self.tokens() + .iter() + .filter_map(|token| token.as_cexpr_token()) + .collect() + } + + /// Obtain the real path name of a cursor of InclusionDirective kind. + /// + /// Returns None if the cursor does not include a file, otherwise the file's full name + pub(crate) fn get_included_file_name(&self) -> Option { + let file = unsafe { clang_sys::clang_getIncludedFile(self.x) }; + if file.is_null() { + None + } else { + Some(unsafe { + cxstring_into_string(clang_sys::clang_getFileName(file)) + }) + } + } +} + +/// A struct that owns the tokenizer result from a given cursor. +pub(crate) struct RawTokens<'a> { + cursor: &'a Cursor, + tu: CXTranslationUnit, + tokens: *mut CXToken, + token_count: c_uint, +} + +impl<'a> RawTokens<'a> { + fn new(cursor: &'a Cursor) -> Self { + let mut tokens = ptr::null_mut(); + let mut token_count = 0; + let range = cursor.extent(); + let tu = unsafe { clang_Cursor_getTranslationUnit(cursor.x) }; + unsafe { clang_tokenize(tu, range, &mut tokens, &mut token_count) }; + Self { + cursor, + tu, + tokens, + token_count, + } + } + + fn as_slice(&self) -> &[CXToken] { + if self.tokens.is_null() { + return &[]; + } + unsafe { slice::from_raw_parts(self.tokens, self.token_count as usize) } + } + + /// Get an iterator over these tokens. + pub(crate) fn iter(&self) -> ClangTokenIterator { + ClangTokenIterator { + tu: self.tu, + raw: self.as_slice().iter(), + } + } +} + +impl<'a> Drop for RawTokens<'a> { + fn drop(&mut self) { + if !self.tokens.is_null() { + unsafe { + clang_disposeTokens( + self.tu, + self.tokens, + self.token_count as c_uint, + ); + } + } + } +} + +/// A raw clang token, that exposes only kind, spelling, and extent. This is a +/// slightly more convenient version of `CXToken` which owns the spelling +/// string and extent. +#[derive(Debug)] +pub(crate) struct ClangToken { + spelling: CXString, + /// The extent of the token. This is the same as the relevant member from + /// `CXToken`. + pub(crate) extent: CXSourceRange, + /// The kind of the token. This is the same as the relevant member from + /// `CXToken`. + pub(crate) kind: CXTokenKind, +} + +impl ClangToken { + /// Get the token spelling, without being converted to utf-8. + pub(crate) fn spelling(&self) -> &[u8] { + let c_str = unsafe { + CStr::from_ptr(clang_getCString(self.spelling) as *const _) + }; + c_str.to_bytes() + } + + /// Converts a ClangToken to a `cexpr` token if possible. + pub(crate) fn as_cexpr_token(&self) -> Option { + use cexpr::token; + + let kind = match self.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => { + warn!("Found unexpected token kind: {:?}", self); + return None; + } + }; + + Some(token::Token { + kind, + raw: self.spelling().to_vec().into_boxed_slice(), + }) + } +} + +impl Drop for ClangToken { + fn drop(&mut self) { + unsafe { clang_disposeString(self.spelling) } + } +} + +/// An iterator over a set of Tokens. +pub(crate) struct ClangTokenIterator<'a> { + tu: CXTranslationUnit, + raw: slice::Iter<'a, CXToken>, +} + +impl<'a> Iterator for ClangTokenIterator<'a> { + type Item = ClangToken; + + fn next(&mut self) -> Option { + let raw = self.raw.next()?; + unsafe { + let kind = clang_getTokenKind(*raw); + let spelling = clang_getTokenSpelling(self.tu, *raw); + let extent = clang_getTokenExtent(self.tu, *raw); + Some(ClangToken { + kind, + extent, + spelling, + }) + } + } +} + +/// Checks whether the name looks like an identifier, i.e. is alphanumeric +/// (including '_') and does not start with a digit. +pub(crate) fn is_valid_identifier(name: &str) -> bool { + let mut chars = name.chars(); + let first_valid = chars + .next() + .map(|c| c.is_alphabetic() || c == '_') + .unwrap_or(false); + + first_valid && chars.all(|c| c.is_alphanumeric() || c == '_') +} + +extern "C" fn visit_children( + cur: CXCursor, + _parent: CXCursor, + data: CXClientData, +) -> CXChildVisitResult +where + Visitor: FnMut(Cursor) -> CXChildVisitResult, +{ + let func: &mut Visitor = unsafe { &mut *(data as *mut Visitor) }; + let child = Cursor { x: cur }; + + (*func)(child) +} + +impl PartialEq for Cursor { + fn eq(&self, other: &Cursor) -> bool { + unsafe { clang_equalCursors(self.x, other.x) == 1 } + } +} + +impl Eq for Cursor {} + +impl Hash for Cursor { + fn hash(&self, state: &mut H) { + unsafe { clang_hashCursor(self.x) }.hash(state) + } +} + +/// The type of a node in clang's AST. +#[derive(Clone, Copy)] +pub(crate) struct Type { + x: CXType, +} + +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + unsafe { clang_equalTypes(self.x, other.x) != 0 } + } +} + +impl Eq for Type {} + +impl fmt::Debug for Type { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "Type({}, kind: {}, cconv: {}, decl: {:?}, canon: {:?})", + self.spelling(), + type_to_str(self.kind()), + self.call_conv(), + self.declaration(), + self.declaration().canonical() + ) + } +} + +/// An error about the layout of a struct, class, or type. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub(crate) enum LayoutError { + /// Asked for the layout of an invalid type. + Invalid, + /// Asked for the layout of an incomplete type. + Incomplete, + /// Asked for the layout of a dependent type. + Dependent, + /// Asked for the layout of a type that does not have constant size. + NotConstantSize, + /// Asked for the layout of a field in a type that does not have such a + /// field. + InvalidFieldName, + /// An unknown layout error. + Unknown, +} + +impl ::std::convert::From for LayoutError { + fn from(val: i32) -> Self { + use self::LayoutError::*; + + match val { + CXTypeLayoutError_Invalid => Invalid, + CXTypeLayoutError_Incomplete => Incomplete, + CXTypeLayoutError_Dependent => Dependent, + CXTypeLayoutError_NotConstantSize => NotConstantSize, + CXTypeLayoutError_InvalidFieldName => InvalidFieldName, + _ => Unknown, + } + } +} + +impl Type { + /// Get this type's kind. + pub(crate) fn kind(&self) -> CXTypeKind { + self.x.kind + } + + /// Get a cursor pointing to this type's declaration. + pub(crate) fn declaration(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getTypeDeclaration(self.x), + } + } + } + + /// Get the canonical declaration of this type, if it is available. + pub(crate) fn canonical_declaration( + &self, + location: Option<&Cursor>, + ) -> Option { + let mut declaration = self.declaration(); + if !declaration.is_valid() { + if let Some(location) = location { + let mut location = *location; + if let Some(referenced) = location.referenced() { + location = referenced; + } + if location.is_template_like() { + declaration = location; + } + } + } + + let canonical = declaration.canonical(); + if canonical.is_valid() && canonical.kind() != CXCursor_NoDeclFound { + Some(CanonicalTypeDeclaration(*self, canonical)) + } else { + None + } + } + + /// Get a raw display name for this type. + pub(crate) fn spelling(&self) -> String { + let s = unsafe { cxstring_into_string(clang_getTypeSpelling(self.x)) }; + // Clang 5.0 introduced changes in the spelling API so it returned the + // full qualified name. Let's undo that here. + if s.split("::").all(is_valid_identifier) { + if let Some(s) = s.split("::").last() { + return s.to_owned(); + } + } + + s + } + + /// Is this type const qualified? + pub(crate) fn is_const(&self) -> bool { + unsafe { clang_isConstQualifiedType(self.x) != 0 } + } + + #[inline] + fn is_non_deductible_auto_type(&self) -> bool { + debug_assert_eq!(self.kind(), CXType_Auto); + self.canonical_type() == *self + } + + #[inline] + fn clang_size_of(&self, ctx: &BindgenContext) -> c_longlong { + match self.kind() { + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40975 + CXType_RValueReference | CXType_LValueReference => { + ctx.target_pointer_size() as c_longlong + } + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40813 + CXType_Auto if self.is_non_deductible_auto_type() => -6, + _ => unsafe { clang_Type_getSizeOf(self.x) }, + } + } + + #[inline] + fn clang_align_of(&self, ctx: &BindgenContext) -> c_longlong { + match self.kind() { + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40975 + CXType_RValueReference | CXType_LValueReference => { + ctx.target_pointer_size() as c_longlong + } + // Work-around https://bugs.llvm.org/show_bug.cgi?id=40813 + CXType_Auto if self.is_non_deductible_auto_type() => -6, + _ => unsafe { clang_Type_getAlignOf(self.x) }, + } + } + + /// What is the size of this type? Paper over invalid types by returning `0` + /// for them. + pub(crate) fn size(&self, ctx: &BindgenContext) -> usize { + let val = self.clang_size_of(ctx); + if val < 0 { + 0 + } else { + val as usize + } + } + + /// What is the size of this type? + pub(crate) fn fallible_size( + &self, + ctx: &BindgenContext, + ) -> Result { + let val = self.clang_size_of(ctx); + if val < 0 { + Err(LayoutError::from(val as i32)) + } else { + Ok(val as usize) + } + } + + /// What is the alignment of this type? Paper over invalid types by + /// returning `0`. + pub(crate) fn align(&self, ctx: &BindgenContext) -> usize { + let val = self.clang_align_of(ctx); + if val < 0 { + 0 + } else { + val as usize + } + } + + /// What is the alignment of this type? + pub(crate) fn fallible_align( + &self, + ctx: &BindgenContext, + ) -> Result { + let val = self.clang_align_of(ctx); + if val < 0 { + Err(LayoutError::from(val as i32)) + } else { + Ok(val as usize) + } + } + + /// Get the layout for this type, or an error describing why it does not + /// have a valid layout. + pub(crate) fn fallible_layout( + &self, + ctx: &BindgenContext, + ) -> Result { + use crate::ir::layout::Layout; + let size = self.fallible_size(ctx)?; + let align = self.fallible_align(ctx)?; + Ok(Layout::new(size, align)) + } + + /// Get the number of template arguments this type has, or `None` if it is + /// not some kind of template. + pub(crate) fn num_template_args(&self) -> Option { + let n = unsafe { clang_Type_getNumTemplateArguments(self.x) }; + if n >= 0 { + Some(n as u32) + } else { + debug_assert_eq!(n, -1); + None + } + } + + /// If this type is a class template specialization, return its + /// template arguments. Otherwise, return None. + pub(crate) fn template_args(&self) -> Option { + self.num_template_args().map(|n| TypeTemplateArgIterator { + x: self.x, + length: n, + index: 0, + }) + } + + /// Given that this type is a function prototype, return the types of its parameters. + /// + /// Returns None if the type is not a function prototype. + pub(crate) fn args(&self) -> Option> { + self.num_args().ok().map(|num| { + (0..num) + .map(|i| Type { + x: unsafe { clang_getArgType(self.x, i as c_uint) }, + }) + .collect() + }) + } + + /// Given that this type is a function prototype, return the number of arguments it takes. + /// + /// Returns Err if the type is not a function prototype. + pub(crate) fn num_args(&self) -> Result { + unsafe { + let w = clang_getNumArgTypes(self.x); + if w == -1 { + Err(()) + } else { + Ok(w as u32) + } + } + } + + /// Given that this type is a pointer type, return the type that it points + /// to. + pub(crate) fn pointee_type(&self) -> Option { + match self.kind() { + CXType_Pointer | + CXType_RValueReference | + CXType_LValueReference | + CXType_MemberPointer | + CXType_BlockPointer | + CXType_ObjCObjectPointer => { + let ret = Type { + x: unsafe { clang_getPointeeType(self.x) }, + }; + debug_assert!(ret.is_valid()); + Some(ret) + } + _ => None, + } + } + + /// Given that this type is an array, vector, or complex type, return the + /// type of its elements. + pub(crate) fn elem_type(&self) -> Option { + let current_type = Type { + x: unsafe { clang_getElementType(self.x) }, + }; + if current_type.is_valid() { + Some(current_type) + } else { + None + } + } + + /// Given that this type is an array or vector type, return its number of + /// elements. + pub(crate) fn num_elements(&self) -> Option { + let num_elements_returned = unsafe { clang_getNumElements(self.x) }; + if num_elements_returned != -1 { + Some(num_elements_returned as usize) + } else { + None + } + } + + /// Get the canonical version of this type. This sees through `typedef`s and + /// aliases to get the underlying, canonical type. + pub(crate) fn canonical_type(&self) -> Type { + unsafe { + Type { + x: clang_getCanonicalType(self.x), + } + } + } + + /// Is this type a variadic function type? + pub(crate) fn is_variadic(&self) -> bool { + unsafe { clang_isFunctionTypeVariadic(self.x) != 0 } + } + + /// Given that this type is a function type, get the type of its return + /// value. + pub(crate) fn ret_type(&self) -> Option { + let rt = Type { + x: unsafe { clang_getResultType(self.x) }, + }; + if rt.is_valid() { + Some(rt) + } else { + None + } + } + + /// Given that this type is a function type, get its calling convention. If + /// this is not a function type, `CXCallingConv_Invalid` is returned. + pub(crate) fn call_conv(&self) -> CXCallingConv { + unsafe { clang_getFunctionTypeCallingConv(self.x) } + } + + /// For elaborated types (types which use `class`, `struct`, or `union` to + /// disambiguate types from local bindings), get the underlying type. + pub(crate) fn named(&self) -> Type { + unsafe { + Type { + x: clang_Type_getNamedType(self.x), + } + } + } + + /// Is this a valid type? + pub(crate) fn is_valid(&self) -> bool { + self.kind() != CXType_Invalid + } + + /// Is this a valid and exposed type? + pub(crate) fn is_valid_and_exposed(&self) -> bool { + self.is_valid() && self.kind() != CXType_Unexposed + } + + /// Is this type a fully instantiated template? + pub(crate) fn is_fully_instantiated_template(&self) -> bool { + // Yep, the spelling of this containing type-parameter is extremely + // nasty... But can happen in . Unfortunately I couldn't + // reduce it enough :( + self.template_args().map_or(false, |args| args.len() > 0) && + !matches!( + self.declaration().kind(), + CXCursor_ClassTemplatePartialSpecialization | + CXCursor_TypeAliasTemplateDecl | + CXCursor_TemplateTemplateParameter + ) + } + + /// Is this type an associated template type? Eg `T::Associated` in + /// this example: + /// + /// ```c++ + /// template + /// class Foo { + /// typename T::Associated member; + /// }; + /// ``` + pub(crate) fn is_associated_type(&self) -> bool { + // This is terrible :( + fn hacky_parse_associated_type>(spelling: S) -> bool { + lazy_static! { + static ref ASSOC_TYPE_RE: regex::Regex = regex::Regex::new( + r"typename type\-parameter\-\d+\-\d+::.+" + ) + .unwrap(); + } + ASSOC_TYPE_RE.is_match(spelling.as_ref()) + } + + self.kind() == CXType_Unexposed && + (hacky_parse_associated_type(self.spelling()) || + hacky_parse_associated_type( + self.canonical_type().spelling(), + )) + } +} + +/// The `CanonicalTypeDeclaration` type exists as proof-by-construction that its +/// cursor is the canonical declaration for its type. If you have a +/// `CanonicalTypeDeclaration` instance, you know for sure that the type and +/// cursor match up in a canonical declaration relationship, and it simply +/// cannot be otherwise. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct CanonicalTypeDeclaration(Type, Cursor); + +impl CanonicalTypeDeclaration { + /// Get the type. + pub(crate) fn ty(&self) -> &Type { + &self.0 + } + + /// Get the type's canonical declaration cursor. + pub(crate) fn cursor(&self) -> &Cursor { + &self.1 + } +} + +/// An iterator for a type's template arguments. +pub(crate) struct TypeTemplateArgIterator { + x: CXType, + length: u32, + index: u32, +} + +impl Iterator for TypeTemplateArgIterator { + type Item = Type; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index as c_uint; + self.index += 1; + Some(Type { + x: unsafe { clang_Type_getTemplateArgumentAsType(self.x, idx) }, + }) + } else { + None + } + } +} + +impl ExactSizeIterator for TypeTemplateArgIterator { + fn len(&self) -> usize { + assert!(self.index <= self.length); + (self.length - self.index) as usize + } +} + +/// A `SourceLocation` is a file, line, column, and byte offset location for +/// some source text. +pub(crate) struct SourceLocation { + x: CXSourceLocation, +} + +impl SourceLocation { + /// Get the (file, line, column, byte offset) tuple for this source + /// location. + pub(crate) fn location(&self) -> (File, usize, usize, usize) { + unsafe { + let mut file = mem::zeroed(); + let mut line = 0; + let mut col = 0; + let mut off = 0; + clang_getSpellingLocation( + self.x, &mut file, &mut line, &mut col, &mut off, + ); + (File { x: file }, line as usize, col as usize, off as usize) + } + } +} + +impl fmt::Display for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (file, line, col, _) = self.location(); + if let Some(name) = file.name() { + write!(f, "{}:{}:{}", name, line, col) + } else { + "builtin definitions".fmt(f) + } + } +} + +impl fmt::Debug for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self) + } +} + +/// A comment in the source text. +/// +/// Comments are sort of parsed by Clang, and have a tree structure. +pub(crate) struct Comment { + x: CXComment, +} + +impl Comment { + /// What kind of comment is this? + pub(crate) fn kind(&self) -> CXCommentKind { + unsafe { clang_Comment_getKind(self.x) } + } + + /// Get this comment's children comment + pub(crate) fn get_children(&self) -> CommentChildrenIterator { + CommentChildrenIterator { + parent: self.x, + length: unsafe { clang_Comment_getNumChildren(self.x) }, + index: 0, + } + } + + /// Given that this comment is the start or end of an HTML tag, get its tag + /// name. + pub(crate) fn get_tag_name(&self) -> String { + unsafe { cxstring_into_string(clang_HTMLTagComment_getTagName(self.x)) } + } + + /// Given that this comment is an HTML start tag, get its attributes. + pub(crate) fn get_tag_attrs(&self) -> CommentAttributesIterator { + CommentAttributesIterator { + x: self.x, + length: unsafe { clang_HTMLStartTag_getNumAttrs(self.x) }, + index: 0, + } + } +} + +/// An iterator for a comment's children +pub(crate) struct CommentChildrenIterator { + parent: CXComment, + length: c_uint, + index: c_uint, +} + +impl Iterator for CommentChildrenIterator { + type Item = Comment; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index; + self.index += 1; + Some(Comment { + x: unsafe { clang_Comment_getChild(self.parent, idx) }, + }) + } else { + None + } + } +} + +/// An HTML start tag comment attribute +pub(crate) struct CommentAttribute { + /// HTML start tag attribute name + pub(crate) name: String, + /// HTML start tag attribute value + pub(crate) value: String, +} + +/// An iterator for a comment's attributes +pub(crate) struct CommentAttributesIterator { + x: CXComment, + length: c_uint, + index: c_uint, +} + +impl Iterator for CommentAttributesIterator { + type Item = CommentAttribute; + fn next(&mut self) -> Option { + if self.index < self.length { + let idx = self.index; + self.index += 1; + Some(CommentAttribute { + name: unsafe { + cxstring_into_string(clang_HTMLStartTag_getAttrName( + self.x, idx, + )) + }, + value: unsafe { + cxstring_into_string(clang_HTMLStartTag_getAttrValue( + self.x, idx, + )) + }, + }) + } else { + None + } + } +} + +/// A source file. +pub(crate) struct File { + x: CXFile, +} + +impl File { + /// Get the name of this source file. + pub(crate) fn name(&self) -> Option { + if self.x.is_null() { + return None; + } + Some(unsafe { cxstring_into_string(clang_getFileName(self.x)) }) + } +} + +fn cxstring_to_string_leaky(s: CXString) -> String { + if s.data.is_null() { + return "".to_owned(); + } + let c_str = unsafe { CStr::from_ptr(clang_getCString(s) as *const _) }; + c_str.to_string_lossy().into_owned() +} + +fn cxstring_into_string(s: CXString) -> String { + let ret = cxstring_to_string_leaky(s); + unsafe { clang_disposeString(s) }; + ret +} + +/// An `Index` is an environment for a set of translation units that will +/// typically end up linked together in one final binary. +pub(crate) struct Index { + x: CXIndex, +} + +impl Index { + /// Construct a new `Index`. + /// + /// The `pch` parameter controls whether declarations in pre-compiled + /// headers are included when enumerating a translation unit's "locals". + /// + /// The `diag` parameter controls whether debugging diagnostics are enabled. + pub(crate) fn new(pch: bool, diag: bool) -> Index { + unsafe { + Index { + x: clang_createIndex(pch as c_int, diag as c_int), + } + } + } +} + +impl fmt::Debug for Index { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Index {{ }}") + } +} + +impl Drop for Index { + fn drop(&mut self) { + unsafe { + clang_disposeIndex(self.x); + } + } +} + +/// A translation unit (or "compilation unit"). +pub(crate) struct TranslationUnit { + x: CXTranslationUnit, +} + +impl fmt::Debug for TranslationUnit { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "TranslationUnit {{ }}") + } +} + +impl TranslationUnit { + /// Parse a source file into a translation unit. + pub(crate) fn parse( + ix: &Index, + file: &str, + cmd_args: &[String], + unsaved: &[UnsavedFile], + opts: CXTranslationUnit_Flags, + ) -> Option { + let fname = CString::new(file).unwrap(); + let _c_args: Vec = cmd_args + .iter() + .map(|s| CString::new(s.clone()).unwrap()) + .collect(); + let c_args: Vec<*const c_char> = + _c_args.iter().map(|s| s.as_ptr()).collect(); + let mut c_unsaved: Vec = + unsaved.iter().map(|f| f.x).collect(); + let tu = unsafe { + clang_parseTranslationUnit( + ix.x, + fname.as_ptr(), + c_args.as_ptr(), + c_args.len() as c_int, + c_unsaved.as_mut_ptr(), + c_unsaved.len() as c_uint, + opts, + ) + }; + if tu.is_null() { + None + } else { + Some(TranslationUnit { x: tu }) + } + } + + /// Get the Clang diagnostic information associated with this translation + /// unit. + pub(crate) fn diags(&self) -> Vec { + unsafe { + let num = clang_getNumDiagnostics(self.x) as usize; + let mut diags = vec![]; + for i in 0..num { + diags.push(Diagnostic { + x: clang_getDiagnostic(self.x, i as c_uint), + }); + } + diags + } + } + + /// Get a cursor pointing to the root of this translation unit's AST. + pub(crate) fn cursor(&self) -> Cursor { + unsafe { + Cursor { + x: clang_getTranslationUnitCursor(self.x), + } + } + } + + /// Is this the null translation unit? + pub(crate) fn is_null(&self) -> bool { + self.x.is_null() + } +} + +impl Drop for TranslationUnit { + fn drop(&mut self) { + unsafe { + clang_disposeTranslationUnit(self.x); + } + } +} + +/// A diagnostic message generated while parsing a translation unit. +pub(crate) struct Diagnostic { + x: CXDiagnostic, +} + +impl Diagnostic { + /// Format this diagnostic message as a string, using the given option bit + /// flags. + pub(crate) fn format(&self) -> String { + unsafe { + let opts = clang_defaultDiagnosticDisplayOptions(); + cxstring_into_string(clang_formatDiagnostic(self.x, opts)) + } + } + + /// What is the severity of this diagnostic message? + pub(crate) fn severity(&self) -> CXDiagnosticSeverity { + unsafe { clang_getDiagnosticSeverity(self.x) } + } +} + +impl Drop for Diagnostic { + /// Destroy this diagnostic message. + fn drop(&mut self) { + unsafe { + clang_disposeDiagnostic(self.x); + } + } +} + +/// A file which has not been saved to disk. +pub(crate) struct UnsavedFile { + x: CXUnsavedFile, + /// The name of the unsaved file. Kept here to avoid leaving dangling pointers in + /// `CXUnsavedFile`. + pub(crate) name: CString, + contents: CString, +} + +impl UnsavedFile { + /// Construct a new unsaved file with the given `name` and `contents`. + pub(crate) fn new(name: String, contents: String) -> UnsavedFile { + let name = CString::new(name).unwrap(); + let contents = CString::new(contents).unwrap(); + let x = CXUnsavedFile { + Filename: name.as_ptr(), + Contents: contents.as_ptr(), + Length: contents.as_bytes().len() as c_ulong, + }; + UnsavedFile { x, name, contents } + } +} + +impl fmt::Debug for UnsavedFile { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "UnsavedFile(name: {:?}, contents: {:?})", + self.name, self.contents + ) + } +} + +/// Convert a cursor kind into a static string. +pub(crate) fn kind_to_str(x: CXCursorKind) -> String { + unsafe { cxstring_into_string(clang_getCursorKindSpelling(x)) } +} + +/// Convert a type kind to a static string. +pub(crate) fn type_to_str(x: CXTypeKind) -> String { + unsafe { cxstring_into_string(clang_getTypeKindSpelling(x)) } +} + +/// Dump the Clang AST to stdout for debugging purposes. +pub(crate) fn ast_dump(c: &Cursor, depth: isize) -> CXChildVisitResult { + fn print_indent>(depth: isize, s: S) { + for _ in 0..depth { + print!(" "); + } + println!("{}", s.as_ref()); + } + + fn print_cursor>(depth: isize, prefix: S, c: &Cursor) { + let prefix = prefix.as_ref(); + print_indent( + depth, + format!(" {}kind = {}", prefix, kind_to_str(c.kind())), + ); + print_indent( + depth, + format!(" {}spelling = \"{}\"", prefix, c.spelling()), + ); + print_indent(depth, format!(" {}location = {}", prefix, c.location())); + print_indent( + depth, + format!(" {}is-definition? {}", prefix, c.is_definition()), + ); + print_indent( + depth, + format!(" {}is-declaration? {}", prefix, c.is_declaration()), + ); + print_indent( + depth, + format!( + " {}is-inlined-function? {}", + prefix, + c.is_inlined_function() + ), + ); + + let templ_kind = c.template_kind(); + if templ_kind != CXCursor_NoDeclFound { + print_indent( + depth, + format!( + " {}template-kind = {}", + prefix, + kind_to_str(templ_kind) + ), + ); + } + if let Some(usr) = c.usr() { + print_indent(depth, format!(" {}usr = \"{}\"", prefix, usr)); + } + if let Ok(num) = c.num_args() { + print_indent(depth, format!(" {}number-of-args = {}", prefix, num)); + } + if let Some(num) = c.num_template_args() { + print_indent( + depth, + format!(" {}number-of-template-args = {}", prefix, num), + ); + } + + if c.is_bit_field() { + let width = match c.bit_width() { + Some(w) => w.to_string(), + None => "".to_string(), + }; + print_indent(depth, format!(" {}bit-width = {}", prefix, width)); + } + + if let Some(ty) = c.enum_type() { + print_indent( + depth, + format!(" {}enum-type = {}", prefix, type_to_str(ty.kind())), + ); + } + if let Some(val) = c.enum_val_signed() { + print_indent(depth, format!(" {}enum-val = {}", prefix, val)); + } + if let Some(ty) = c.typedef_type() { + print_indent( + depth, + format!(" {}typedef-type = {}", prefix, type_to_str(ty.kind())), + ); + } + if let Some(ty) = c.ret_type() { + print_indent( + depth, + format!(" {}ret-type = {}", prefix, type_to_str(ty.kind())), + ); + } + + if let Some(refd) = c.referenced() { + if refd != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "referenced.", + &refd, + ); + } + } + + let canonical = c.canonical(); + if canonical != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "canonical.", + &canonical, + ); + } + + if let Some(specialized) = c.specialized() { + if specialized != *c { + println!(); + print_cursor( + depth, + String::from(prefix) + "specialized.", + &specialized, + ); + } + } + + if let Some(parent) = c.fallible_semantic_parent() { + println!(); + print_cursor( + depth, + String::from(prefix) + "semantic-parent.", + &parent, + ); + } + } + + fn print_type>(depth: isize, prefix: S, ty: &Type) { + let prefix = prefix.as_ref(); + + let kind = ty.kind(); + print_indent(depth, format!(" {}kind = {}", prefix, type_to_str(kind))); + if kind == CXType_Invalid { + return; + } + + print_indent(depth, format!(" {}cconv = {}", prefix, ty.call_conv())); + + print_indent( + depth, + format!(" {}spelling = \"{}\"", prefix, ty.spelling()), + ); + let num_template_args = + unsafe { clang_Type_getNumTemplateArguments(ty.x) }; + if num_template_args >= 0 { + print_indent( + depth, + format!( + " {}number-of-template-args = {}", + prefix, num_template_args + ), + ); + } + if let Some(num) = ty.num_elements() { + print_indent( + depth, + format!(" {}number-of-elements = {}", prefix, num), + ); + } + print_indent( + depth, + format!(" {}is-variadic? {}", prefix, ty.is_variadic()), + ); + + let canonical = ty.canonical_type(); + if canonical != *ty { + println!(); + print_type(depth, String::from(prefix) + "canonical.", &canonical); + } + + if let Some(pointee) = ty.pointee_type() { + if pointee != *ty { + println!(); + print_type(depth, String::from(prefix) + "pointee.", &pointee); + } + } + + if let Some(elem) = ty.elem_type() { + if elem != *ty { + println!(); + print_type(depth, String::from(prefix) + "elements.", &elem); + } + } + + if let Some(ret) = ty.ret_type() { + if ret != *ty { + println!(); + print_type(depth, String::from(prefix) + "return.", &ret); + } + } + + let named = ty.named(); + if named != *ty && named.is_valid() { + println!(); + print_type(depth, String::from(prefix) + "named.", &named); + } + } + + print_indent(depth, "("); + print_cursor(depth, "", c); + + println!(); + let ty = c.cur_type(); + print_type(depth, "type.", &ty); + + let declaration = ty.declaration(); + if declaration != *c && declaration.kind() != CXCursor_NoDeclFound { + println!(); + print_cursor(depth, "type.declaration.", &declaration); + } + + // Recurse. + let mut found_children = false; + c.visit(|s| { + if !found_children { + println!(); + found_children = true; + } + ast_dump(&s, depth + 1) + }); + + print_indent(depth, ")"); + + CXChildVisit_Continue +} + +/// Try to extract the clang version to a string +pub(crate) fn extract_clang_version() -> String { + unsafe { cxstring_into_string(clang_getClangVersion()) } +} + +/// A wrapper for the result of evaluating an expression. +#[derive(Debug)] +pub(crate) struct EvalResult { + x: CXEvalResult, + ty: Type, +} + +impl EvalResult { + /// Evaluate `cursor` and return the result. + pub(crate) fn new(cursor: Cursor) -> Option { + // Work around https://bugs.llvm.org/show_bug.cgi?id=42532, see: + // * https://github.com/rust-lang/rust-bindgen/issues/283 + // * https://github.com/rust-lang/rust-bindgen/issues/1590 + { + let mut found_cant_eval = false; + cursor.visit(|c| { + if c.kind() == CXCursor_TypeRef && + c.cur_type().canonical_type().kind() == CXType_Unexposed + { + found_cant_eval = true; + return CXChildVisit_Break; + } + + CXChildVisit_Recurse + }); + + if found_cant_eval { + return None; + } + } + Some(EvalResult { + x: unsafe { clang_Cursor_Evaluate(cursor.x) }, + ty: cursor.cur_type().canonical_type(), + }) + } + + fn kind(&self) -> CXEvalResultKind { + unsafe { clang_EvalResult_getKind(self.x) } + } + + /// Try to get back the result as a double. + pub(crate) fn as_double(&self) -> Option { + match self.kind() { + CXEval_Float => { + Some(unsafe { clang_EvalResult_getAsDouble(self.x) }) + } + _ => None, + } + } + + /// Try to get back the result as an integer. + pub(crate) fn as_int(&self) -> Option { + if self.kind() != CXEval_Int { + return None; + } + + if unsafe { clang_EvalResult_isUnsignedInt(self.x) } != 0 { + let value = unsafe { clang_EvalResult_getAsUnsigned(self.x) }; + if value > i64::max_value() as c_ulonglong { + return None; + } + + return Some(value as i64); + } + + let value = unsafe { clang_EvalResult_getAsLongLong(self.x) }; + if value > i64::max_value() as c_longlong { + return None; + } + if value < i64::min_value() as c_longlong { + return None; + } + #[allow(clippy::unnecessary_cast)] + Some(value as i64) + } + + /// Evaluates the expression as a literal string, that may or may not be + /// valid utf-8. + pub(crate) fn as_literal_string(&self) -> Option> { + if self.kind() != CXEval_StrLiteral { + return None; + } + + let char_ty = self.ty.pointee_type().or_else(|| self.ty.elem_type())?; + match char_ty.kind() { + CXType_Char_S | CXType_SChar | CXType_Char_U | CXType_UChar => { + let ret = unsafe { + CStr::from_ptr(clang_EvalResult_getAsStr(self.x)) + }; + Some(ret.to_bytes().to_vec()) + } + // FIXME: Support generating these. + CXType_Char16 => None, + CXType_Char32 => None, + CXType_WChar => None, + _ => None, + } + } +} + +impl Drop for EvalResult { + fn drop(&mut self) { + unsafe { clang_EvalResult_dispose(self.x) }; + } +} + +/// Target information obtained from libclang. +#[derive(Debug)] +pub(crate) struct TargetInfo { + /// The target triple. + pub(crate) triple: String, + /// The width of the pointer _in bits_. + pub(crate) pointer_width: usize, +} + +impl TargetInfo { + /// Tries to obtain target information from libclang. + pub(crate) fn new(tu: &TranslationUnit) -> Self { + let triple; + let pointer_width; + unsafe { + let ti = clang_getTranslationUnitTargetInfo(tu.x); + triple = cxstring_into_string(clang_TargetInfo_getTriple(ti)); + pointer_width = clang_TargetInfo_getPointerWidth(ti); + clang_TargetInfo_dispose(ti); + } + assert!(pointer_width > 0); + assert_eq!(pointer_width % 8, 0); + TargetInfo { + triple, + pointer_width: pointer_width as usize, + } + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,102 @@ +#[repr(C)] +#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct __BindgenBitfieldUnit { + storage: Storage, +} + +impl __BindgenBitfieldUnit { + #[inline] + pub const fn new(storage: Storage) -> Self { + Self { storage } + } +} + +impl __BindgenBitfieldUnit +where + Storage: AsRef<[u8]> + AsMut<[u8]>, +{ + #[inline] + pub fn get_bit(&self, index: usize) -> bool { + debug_assert!(index / 8 < self.storage.as_ref().len()); + + let byte_index = index / 8; + let byte = self.storage.as_ref()[byte_index]; + + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + + let mask = 1 << bit_index; + + byte & mask == mask + } + + #[inline] + pub fn set_bit(&mut self, index: usize, val: bool) { + debug_assert!(index / 8 < self.storage.as_ref().len()); + + let byte_index = index / 8; + let byte = &mut self.storage.as_mut()[byte_index]; + + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + + let mask = 1 << bit_index; + if val { + *byte |= mask; + } else { + *byte &= !mask; + } + } + + #[inline] + pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!( + (bit_offset + (bit_width as usize)) / 8 <= + self.storage.as_ref().len() + ); + + let mut val = 0; + + for i in 0..(bit_width as usize) { + if self.get_bit(i + bit_offset) { + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + val |= 1 << index; + } + } + + val + } + + #[inline] + pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!( + (bit_offset + (bit_width as usize)) / 8 <= + self.storage.as_ref().len() + ); + + for i in 0..(bit_width as usize) { + let mask = 1 << i; + let val_bit_is_set = val & mask == mask; + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + self.set_bit(index + bit_offset, val_bit_is_set); + } + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/bitfield_unit_tests.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,260 @@ +//! Tests for `__BindgenBitfieldUnit`. +//! +//! Note that bit-fields are allocated right to left (least to most significant +//! bits). +//! +//! From the x86 PS ABI: +//! +//! ```c +//! struct { +//! int j : 5; +//! int k : 6; +//! int m : 7; +//! }; +//! ``` +//! +//! ```ignore +//! +------------------------------------------------------------+ +//! | | | | | +//! | padding | m | k | j | +//! |31 18|17 11|10 5|4 0| +//! +------------------------------------------------------------+ +//! ``` + +use super::bitfield_unit::__BindgenBitfieldUnit; + +#[test] +fn bitfield_unit_get_bit() { + let unit = __BindgenBitfieldUnit::<[u8; 2]>::new([0b10011101, 0b00011101]); + + let mut bits = vec![]; + for i in 0..16 { + bits.push(unit.get_bit(i)); + } + + println!(); + println!("bits = {:?}", bits); + assert_eq!( + bits, + &[ + // 0b10011101 + true, false, true, true, true, false, false, true, + // 0b00011101 + true, false, true, true, true, false, false, false + ] + ); +} + +#[test] +fn bitfield_unit_set_bit() { + let mut unit = + __BindgenBitfieldUnit::<[u8; 2]>::new([0b00000000, 0b00000000]); + + for i in 0..16 { + if i % 3 == 0 { + unit.set_bit(i, true); + } + } + + for i in 0..16 { + assert_eq!(unit.get_bit(i), i % 3 == 0); + } + + let mut unit = + __BindgenBitfieldUnit::<[u8; 2]>::new([0b11111111, 0b11111111]); + + for i in 0..16 { + if i % 3 == 0 { + unit.set_bit(i, false); + } + } + + for i in 0..16 { + assert_eq!(unit.get_bit(i), i % 3 != 0); + } +} + +macro_rules! bitfield_unit_get { + ( + $( + With $storage:expr , then get($start:expr, $len:expr) is $expected:expr; + )* + ) => { + #[test] + fn bitfield_unit_get() { + $({ + let expected = $expected; + let unit = __BindgenBitfieldUnit::<_>::new($storage); + let actual = unit.get($start, $len); + + println!(); + println!("expected = {:064b}", expected); + println!("actual = {:064b}", actual); + + assert_eq!(expected, actual); + })* + } + } +} + +bitfield_unit_get! { + // Let's just exhaustively test getting the bits from a single byte, since + // there are few enough combinations... + + With [0b11100010], then get(0, 1) is 0; + With [0b11100010], then get(1, 1) is 1; + With [0b11100010], then get(2, 1) is 0; + With [0b11100010], then get(3, 1) is 0; + With [0b11100010], then get(4, 1) is 0; + With [0b11100010], then get(5, 1) is 1; + With [0b11100010], then get(6, 1) is 1; + With [0b11100010], then get(7, 1) is 1; + + With [0b11100010], then get(0, 2) is 0b10; + With [0b11100010], then get(1, 2) is 0b01; + With [0b11100010], then get(2, 2) is 0b00; + With [0b11100010], then get(3, 2) is 0b00; + With [0b11100010], then get(4, 2) is 0b10; + With [0b11100010], then get(5, 2) is 0b11; + With [0b11100010], then get(6, 2) is 0b11; + + With [0b11100010], then get(0, 3) is 0b010; + With [0b11100010], then get(1, 3) is 0b001; + With [0b11100010], then get(2, 3) is 0b000; + With [0b11100010], then get(3, 3) is 0b100; + With [0b11100010], then get(4, 3) is 0b110; + With [0b11100010], then get(5, 3) is 0b111; + + With [0b11100010], then get(0, 4) is 0b0010; + With [0b11100010], then get(1, 4) is 0b0001; + With [0b11100010], then get(2, 4) is 0b1000; + With [0b11100010], then get(3, 4) is 0b1100; + With [0b11100010], then get(4, 4) is 0b1110; + + With [0b11100010], then get(0, 5) is 0b00010; + With [0b11100010], then get(1, 5) is 0b10001; + With [0b11100010], then get(2, 5) is 0b11000; + With [0b11100010], then get(3, 5) is 0b11100; + + With [0b11100010], then get(0, 6) is 0b100010; + With [0b11100010], then get(1, 6) is 0b110001; + With [0b11100010], then get(2, 6) is 0b111000; + + With [0b11100010], then get(0, 7) is 0b1100010; + With [0b11100010], then get(1, 7) is 0b1110001; + + With [0b11100010], then get(0, 8) is 0b11100010; + + // OK. Now let's test getting bits from across byte boundaries. + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(0, 16) is 0b1111111101010101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(1, 16) is 0b0111111110101010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(2, 16) is 0b0011111111010101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(3, 16) is 0b0001111111101010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(4, 16) is 0b0000111111110101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(5, 16) is 0b0000011111111010; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(6, 16) is 0b0000001111111101; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(7, 16) is 0b0000000111111110; + + With [0b01010101, 0b11111111, 0b00000000, 0b11111111], + then get(8, 16) is 0b0000000011111111; +} + +macro_rules! bitfield_unit_set { + ( + $( + set($start:expr, $len:expr, $val:expr) is $expected:expr; + )* + ) => { + #[test] + fn bitfield_unit_set() { + $( + let mut unit = __BindgenBitfieldUnit::<[u8; 4]>::new([0, 0, 0, 0]); + unit.set($start, $len, $val); + let actual = unit.get(0, 32); + + println!(); + println!("set({}, {}, {:032b}", $start, $len, $val); + println!("expected = {:064b}", $expected); + println!("actual = {:064b}", actual); + + assert_eq!($expected, actual); + )* + } + } +} + +bitfield_unit_set! { + // Once again, let's exhaustively test single byte combinations. + + set(0, 1, 0b11111111) is 0b00000001; + set(1, 1, 0b11111111) is 0b00000010; + set(2, 1, 0b11111111) is 0b00000100; + set(3, 1, 0b11111111) is 0b00001000; + set(4, 1, 0b11111111) is 0b00010000; + set(5, 1, 0b11111111) is 0b00100000; + set(6, 1, 0b11111111) is 0b01000000; + set(7, 1, 0b11111111) is 0b10000000; + + set(0, 2, 0b11111111) is 0b00000011; + set(1, 2, 0b11111111) is 0b00000110; + set(2, 2, 0b11111111) is 0b00001100; + set(3, 2, 0b11111111) is 0b00011000; + set(4, 2, 0b11111111) is 0b00110000; + set(5, 2, 0b11111111) is 0b01100000; + set(6, 2, 0b11111111) is 0b11000000; + + set(0, 3, 0b11111111) is 0b00000111; + set(1, 3, 0b11111111) is 0b00001110; + set(2, 3, 0b11111111) is 0b00011100; + set(3, 3, 0b11111111) is 0b00111000; + set(4, 3, 0b11111111) is 0b01110000; + set(5, 3, 0b11111111) is 0b11100000; + + set(0, 4, 0b11111111) is 0b00001111; + set(1, 4, 0b11111111) is 0b00011110; + set(2, 4, 0b11111111) is 0b00111100; + set(3, 4, 0b11111111) is 0b01111000; + set(4, 4, 0b11111111) is 0b11110000; + + set(0, 5, 0b11111111) is 0b00011111; + set(1, 5, 0b11111111) is 0b00111110; + set(2, 5, 0b11111111) is 0b01111100; + set(3, 5, 0b11111111) is 0b11111000; + + set(0, 6, 0b11111111) is 0b00111111; + set(1, 6, 0b11111111) is 0b01111110; + set(2, 6, 0b11111111) is 0b11111100; + + set(0, 7, 0b11111111) is 0b01111111; + set(1, 7, 0b11111111) is 0b11111110; + + set(0, 8, 0b11111111) is 0b11111111; + + // And, now let's cross byte boundaries. + + set(0, 16, 0b1111111111111111) is 0b00000000000000001111111111111111; + set(1, 16, 0b1111111111111111) is 0b00000000000000011111111111111110; + set(2, 16, 0b1111111111111111) is 0b00000000000000111111111111111100; + set(3, 16, 0b1111111111111111) is 0b00000000000001111111111111111000; + set(4, 16, 0b1111111111111111) is 0b00000000000011111111111111110000; + set(5, 16, 0b1111111111111111) is 0b00000000000111111111111111100000; + set(6, 16, 0b1111111111111111) is 0b00000000001111111111111111000000; + set(7, 16, 0b1111111111111111) is 0b00000000011111111111111110000000; + set(8, 16, 0b1111111111111111) is 0b00000000111111111111111100000000; +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/dyngen.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,201 @@ +use crate::codegen; +use crate::ir::context::BindgenContext; +use crate::ir::function::ClangAbi; +use proc_macro2::Ident; + +/// Used to build the output tokens for dynamic bindings. +#[derive(Default)] +pub(crate) struct DynamicItems { + /// Tracks the tokens that will appears inside the library struct -- e.g.: + /// ```ignore + /// struct Lib { + /// __library: ::libloading::Library, + /// pub x: Result, // <- tracks these + /// ... + /// } + /// ``` + struct_members: Vec, + + /// Tracks the tokens that will appear inside the library struct's implementation, e.g.: + /// + /// ```ignore + /// impl Lib { + /// ... + /// pub unsafe fn foo(&self, ...) { // <- tracks these + /// ... + /// } + /// } + /// ``` + struct_implementation: Vec, + + /// Tracks the initialization of the fields inside the `::new` constructor of the library + /// struct, e.g.: + /// ```ignore + /// impl Lib { + /// + /// pub unsafe fn new

(path: P) -> Result + /// where + /// P: AsRef<::std::ffi::OsStr>, + /// { + /// ... + /// let foo = __library.get(...) ...; // <- tracks these + /// ... + /// } + /// + /// ... + /// } + /// ``` + constructor_inits: Vec, + + /// Tracks the information that is passed to the library struct at the end of the `::new` + /// constructor, e.g.: + /// ```ignore + /// impl LibFoo { + /// pub unsafe fn new

(path: P) -> Result + /// where + /// P: AsRef<::std::ffi::OsStr>, + /// { + /// ... + /// Ok(LibFoo { + /// __library: __library, + /// foo, + /// bar, // <- tracks these + /// ... + /// }) + /// } + /// } + /// ``` + init_fields: Vec, +} + +impl DynamicItems { + pub(crate) fn new() -> Self { + Self::default() + } + + pub(crate) fn get_tokens( + &self, + lib_ident: Ident, + ctx: &BindgenContext, + ) -> proc_macro2::TokenStream { + let struct_members = &self.struct_members; + let constructor_inits = &self.constructor_inits; + let init_fields = &self.init_fields; + let struct_implementation = &self.struct_implementation; + + let from_library = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { Self::from_library(library) }) + } else { + quote!(Self::from_library(library)) + }; + + quote! { + extern crate libloading; + + pub struct #lib_ident { + __library: ::libloading::Library, + #(#struct_members)* + } + + impl #lib_ident { + pub unsafe fn new

( + path: P + ) -> Result + where P: AsRef<::std::ffi::OsStr> { + let library = ::libloading::Library::new(path)?; + #from_library + } + + pub unsafe fn from_library( + library: L + ) -> Result + where L: Into<::libloading::Library> { + let __library = library.into(); + #( #constructor_inits )* + Ok(#lib_ident { + __library, + #( #init_fields ),* + }) + } + + #( #struct_implementation )* + } + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn push( + &mut self, + ident: Ident, + abi: ClangAbi, + is_variadic: bool, + is_required: bool, + args: Vec, + args_identifiers: Vec, + ret: proc_macro2::TokenStream, + ret_ty: proc_macro2::TokenStream, + attributes: Vec, + ctx: &BindgenContext, + ) { + if !is_variadic { + assert_eq!(args.len(), args_identifiers.len()); + } + + let signature = quote! { unsafe extern #abi fn ( #( #args),* ) #ret }; + let member = if is_required { + signature + } else { + quote! { Result<#signature, ::libloading::Error> } + }; + + self.struct_members.push(quote! { + pub #ident: #member, + }); + + // N.B: If the signature was required, it won't be wrapped in a Result<...> + // and we can simply call it directly. + let fn_ = if is_required { + quote! { self.#ident } + } else { + quote! { self.#ident.as_ref().expect("Expected function, got error.") } + }; + let call_body = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { (#fn_)(#( #args_identifiers ),*) }) + } else { + quote!((#fn_)(#( #args_identifiers ),*) ) + }; + + // We can't implement variadic functions from C easily, so we allow to + // access the function pointer so that the user can call it just fine. + if !is_variadic { + self.struct_implementation.push(quote! { + #(#attributes)* + pub unsafe fn #ident ( &self, #( #args ),* ) #ret_ty { + #call_body + } + }); + } + + // N.B: Unwrap the signature upon construction if it is required to be resolved. + let ident_str = codegen::helpers::ast_ty::cstr_expr(ident.to_string()); + let library_get = if ctx.options().wrap_unsafe_ops { + quote!(unsafe { __library.get(#ident_str) }) + } else { + quote!(__library.get(#ident_str)) + }; + + self.constructor_inits.push(if is_required { + quote! { + let #ident = #library_get.map(|sym| *sym)?; + } + } else { + quote! { + let #ident = #library_get.map(|sym| *sym); + } + }); + + self.init_fields.push(quote! { + #ident + }); + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/error.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,33 @@ +use std::error; +use std::fmt; + +/// Errors that can occur during code generation. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum Error { + /// Tried to generate an opaque blob for a type that did not have a layout. + NoLayoutForOpaqueBlob, + + /// Tried to instantiate an opaque template definition, or a template + /// definition that is too difficult for us to understand (like a partial + /// template specialization). + InstantiationOfOpaqueType, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match *self { + Error::NoLayoutForOpaqueBlob => { + "Tried to generate an opaque blob, but had no layout" + } + Error::InstantiationOfOpaqueType => { + "Instantiation of opaque template type or partial template \ + specialization" + } + }) + } +} + +impl error::Error for Error {} + +/// A `Result` of `T` or an error of `bindgen::codegen::error::Error`. +pub(crate) type Result = ::std::result::Result; diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/helpers.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,322 @@ +//! Helpers for code generation that don't need macro expansion. + +use crate::ir::context::BindgenContext; +use crate::ir::layout::Layout; +use proc_macro2::{Ident, Span, TokenStream}; +use quote::TokenStreamExt; + +pub(crate) mod attributes { + use proc_macro2::{Ident, Span, TokenStream}; + use std::{borrow::Cow, str::FromStr}; + + pub(crate) fn repr(which: &str) -> TokenStream { + let which = Ident::new(which, Span::call_site()); + quote! { + #[repr( #which )] + } + } + + pub(crate) fn repr_list(which_ones: &[&str]) -> TokenStream { + let which_ones = which_ones + .iter() + .cloned() + .map(|one| TokenStream::from_str(one).expect("repr to be valid")); + quote! { + #[repr( #( #which_ones ),* )] + } + } + + pub(crate) fn derives(which_ones: &[&str]) -> TokenStream { + let which_ones = which_ones + .iter() + .cloned() + .map(|one| TokenStream::from_str(one).expect("derive to be valid")); + quote! { + #[derive( #( #which_ones ),* )] + } + } + + pub(crate) fn inline() -> TokenStream { + quote! { + #[inline] + } + } + + pub(crate) fn must_use() -> TokenStream { + quote! { + #[must_use] + } + } + + pub(crate) fn non_exhaustive() -> TokenStream { + quote! { + #[non_exhaustive] + } + } + + pub(crate) fn doc(comment: String) -> TokenStream { + if comment.is_empty() { + quote!() + } else { + quote!(#[doc = #comment]) + } + } + + pub(crate) fn link_name(name: &str) -> TokenStream { + // LLVM mangles the name by default but it's already mangled. + // Prefixing the name with \u{1} should tell LLVM to not mangle it. + let name: Cow<'_, str> = if MANGLE { + name.into() + } else { + format!("\u{1}{}", name).into() + }; + + quote! { + #[link_name = #name] + } + } +} + +/// Generates a proper type for a field or type with a given `Layout`, that is, +/// a type with the correct size and alignment restrictions. +pub(crate) fn blob(ctx: &BindgenContext, layout: Layout) -> TokenStream { + let opaque = layout.opaque(); + + // FIXME(emilio, #412): We fall back to byte alignment, but there are + // some things that legitimately are more than 8-byte aligned. + // + // Eventually we should be able to `unwrap` here, but... + let ty_name = match opaque.known_rust_type_for_array(ctx) { + Some(ty) => ty, + None => { + warn!("Found unknown alignment on code generation!"); + "u8" + } + }; + + let ty_name = Ident::new(ty_name, Span::call_site()); + + let data_len = opaque.array_size(ctx).unwrap_or(layout.size); + + if data_len == 1 { + quote! { + #ty_name + } + } else { + quote! { + [ #ty_name ; #data_len ] + } + } +} + +/// Integer type of the same size as the given `Layout`. +pub(crate) fn integer_type( + ctx: &BindgenContext, + layout: Layout, +) -> Option { + let name = Layout::known_type_for_size(ctx, layout.size)?; + let name = Ident::new(name, Span::call_site()); + Some(quote! { #name }) +} + +/// Generates a bitfield allocation unit type for a type with the given `Layout`. +pub(crate) fn bitfield_unit( + ctx: &BindgenContext, + layout: Layout, +) -> TokenStream { + let mut tokens = quote! {}; + + if ctx.options().enable_cxx_namespaces { + tokens.append_all(quote! { root:: }); + } + + let size = layout.size; + tokens.append_all(quote! { + __BindgenBitfieldUnit<[u8; #size]> + }); + + tokens +} + +pub(crate) mod ast_ty { + use crate::ir::context::BindgenContext; + use crate::ir::function::FunctionSig; + use crate::ir::layout::Layout; + use crate::ir::ty::FloatKind; + use proc_macro2::{self, TokenStream}; + use std::str::FromStr; + + pub(crate) fn c_void(ctx: &BindgenContext) -> TokenStream { + // ctypes_prefix takes precedence + match ctx.options().ctypes_prefix { + Some(ref prefix) => { + let prefix = TokenStream::from_str(prefix.as_str()).unwrap(); + quote! { + #prefix::c_void + } + } + None => { + if ctx.options().use_core && + ctx.options().rust_features.core_ffi_c_void + { + quote! { ::core::ffi::c_void } + } else { + quote! { ::std::os::raw::c_void } + } + } + } + } + + pub(crate) fn raw_type(ctx: &BindgenContext, name: &str) -> TokenStream { + let ident = ctx.rust_ident_raw(name); + match ctx.options().ctypes_prefix { + Some(ref prefix) => { + let prefix = TokenStream::from_str(prefix.as_str()).unwrap(); + quote! { + #prefix::#ident + } + } + None => { + if ctx.options().use_core && + ctx.options().rust_features().core_ffi_c + { + quote! { + ::core::ffi::#ident + } + } else { + quote! { + ::std::os::raw::#ident + } + } + } + } + } + + pub(crate) fn float_kind_rust_type( + ctx: &BindgenContext, + fk: FloatKind, + layout: Option, + ) -> TokenStream { + // TODO: we probably should take the type layout into account more + // often? + // + // Also, maybe this one shouldn't be the default? + match (fk, ctx.options().convert_floats) { + (FloatKind::Float, true) => quote! { f32 }, + (FloatKind::Double, true) => quote! { f64 }, + (FloatKind::Float, false) => raw_type(ctx, "c_float"), + (FloatKind::Double, false) => raw_type(ctx, "c_double"), + (FloatKind::LongDouble, _) => { + match layout { + Some(layout) => { + match layout.size { + 4 => quote! { f32 }, + 8 => quote! { f64 }, + // TODO(emilio): If rust ever gains f128 we should + // use it here and below. + _ => super::integer_type(ctx, layout) + .unwrap_or(quote! { f64 }), + } + } + None => { + debug_assert!( + false, + "How didn't we know the layout for a primitive type?" + ); + quote! { f64 } + } + } + } + (FloatKind::Float128, _) => { + if ctx.options().rust_features.i128_and_u128 { + quote! { u128 } + } else { + quote! { [u64; 2] } + } + } + } + } + + pub(crate) fn int_expr(val: i64) -> TokenStream { + // Don't use quote! { #val } because that adds the type suffix. + let val = proc_macro2::Literal::i64_unsuffixed(val); + quote!(#val) + } + + pub(crate) fn uint_expr(val: u64) -> TokenStream { + // Don't use quote! { #val } because that adds the type suffix. + let val = proc_macro2::Literal::u64_unsuffixed(val); + quote!(#val) + } + + pub(crate) fn byte_array_expr(bytes: &[u8]) -> TokenStream { + let mut bytes: Vec<_> = bytes.to_vec(); + bytes.push(0); + quote! { [ #(#bytes),* ] } + } + + pub(crate) fn cstr_expr(mut string: String) -> TokenStream { + string.push('\0'); + let b = proc_macro2::Literal::byte_string(string.as_bytes()); + quote! { + #b + } + } + + pub(crate) fn float_expr( + ctx: &BindgenContext, + f: f64, + ) -> Result { + if f.is_finite() { + let val = proc_macro2::Literal::f64_unsuffixed(f); + + return Ok(quote!(#val)); + } + + let prefix = ctx.trait_prefix(); + + if f.is_nan() { + return Ok(quote! { + ::#prefix::f64::NAN + }); + } + + if f.is_infinite() { + return Ok(if f.is_sign_positive() { + quote! { + ::#prefix::f64::INFINITY + } + } else { + quote! { + ::#prefix::f64::NEG_INFINITY + } + }); + } + + warn!("Unknown non-finite float number: {:?}", f); + Err(()) + } + + pub(crate) fn arguments_from_signature( + signature: &FunctionSig, + ctx: &BindgenContext, + ) -> Vec { + let mut unnamed_arguments = 0; + signature + .argument_types() + .iter() + .map(|&(ref name, _ty)| match *name { + Some(ref name) => { + let name = ctx.rust_ident(name); + quote! { #name } + } + None => { + unnamed_arguments += 1; + let name = + ctx.rust_ident(format!("arg{}", unnamed_arguments)); + quote! { #name } + } + }) + .collect() + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_debug.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,245 @@ +use crate::ir::comp::{BitfieldUnit, CompKind, Field, FieldData, FieldMethods}; +use crate::ir::context::BindgenContext; +use crate::ir::item::{HasTypeParamInArray, IsOpaque, Item, ItemCanonicalName}; +use crate::ir::ty::{TypeKind, RUST_DERIVE_IN_ARRAY_LIMIT}; + +pub(crate) fn gen_debug_impl( + ctx: &BindgenContext, + fields: &[Field], + item: &Item, + kind: CompKind, +) -> proc_macro2::TokenStream { + let struct_name = item.canonical_name(ctx); + let mut format_string = format!("{} {{{{ ", struct_name); + let mut tokens = vec![]; + + if item.is_opaque(ctx, &()) { + format_string.push_str("opaque"); + } else { + match kind { + CompKind::Union => { + format_string.push_str("union"); + } + CompKind::Struct => { + let processed_fields = fields.iter().filter_map(|f| match f { + Field::DataMember(ref fd) => fd.impl_debug(ctx, ()), + Field::Bitfields(ref bu) => bu.impl_debug(ctx, ()), + }); + + for (i, (fstring, toks)) in processed_fields.enumerate() { + if i > 0 { + format_string.push_str(", "); + } + tokens.extend(toks); + format_string.push_str(&fstring); + } + } + } + } + + format_string.push_str(" }}"); + tokens.insert(0, quote! { #format_string }); + + let prefix = ctx.trait_prefix(); + + quote! { + fn fmt(&self, f: &mut ::#prefix::fmt::Formatter<'_>) -> ::#prefix ::fmt::Result { + write!(f, #( #tokens ),*) + } + } +} + +/// A trait for the things which we can codegen tokens that contribute towards a +/// generated `impl Debug`. +pub(crate) trait ImplDebug<'a> { + /// Any extra parameter required by this a particular `ImplDebug` implementation. + type Extra; + + /// Generate a format string snippet to be included in the larger `impl Debug` + /// format string, and the code to get the format string's interpolation values. + fn impl_debug( + &self, + ctx: &BindgenContext, + extra: Self::Extra, + ) -> Option<(String, Vec)>; +} + +impl<'a> ImplDebug<'a> for FieldData { + type Extra = (); + + fn impl_debug( + &self, + ctx: &BindgenContext, + _: Self::Extra, + ) -> Option<(String, Vec)> { + if let Some(name) = self.name() { + ctx.resolve_item(self.ty()).impl_debug(ctx, name) + } else { + None + } + } +} + +impl<'a> ImplDebug<'a> for BitfieldUnit { + type Extra = (); + + fn impl_debug( + &self, + ctx: &BindgenContext, + _: Self::Extra, + ) -> Option<(String, Vec)> { + let mut format_string = String::new(); + let mut tokens = vec![]; + for (i, bitfield) in self.bitfields().iter().enumerate() { + if i > 0 { + format_string.push_str(", "); + } + + if let Some(bitfield_name) = bitfield.name() { + format_string.push_str(&format!("{} : {{:?}}", bitfield_name)); + let getter_name = bitfield.getter_name(); + let name_ident = ctx.rust_ident_raw(getter_name); + tokens.push(quote! { + self.#name_ident () + }); + } + } + + Some((format_string, tokens)) + } +} + +impl<'a> ImplDebug<'a> for Item { + type Extra = &'a str; + + fn impl_debug( + &self, + ctx: &BindgenContext, + name: &str, + ) -> Option<(String, Vec)> { + let name_ident = ctx.rust_ident(name); + + // We don't know if blocklisted items `impl Debug` or not, so we can't + // add them to the format string we're building up. + if !ctx.allowlisted_items().contains(&self.id()) { + return None; + } + + let ty = match self.as_type() { + Some(ty) => ty, + None => { + return None; + } + }; + + fn debug_print( + name: &str, + name_ident: proc_macro2::TokenStream, + ) -> Option<(String, Vec)> { + Some(( + format!("{}: {{:?}}", name), + vec![quote! { + self.#name_ident + }], + )) + } + + match *ty.kind() { + // Handle the simple cases. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::Comp(..) | + TypeKind::ObjCSel => debug_print(name, quote! { #name_ident }), + + TypeKind::TemplateInstantiation(ref inst) => { + if inst.is_opaque(ctx, self) { + Some((format!("{}: opaque", name), vec![])) + } else { + debug_print(name, quote! { #name_ident }) + } + } + + // The generic is not required to implement Debug, so we can not debug print that type + TypeKind::TypeParam => { + Some((format!("{}: Non-debuggable generic", name), vec![])) + } + + TypeKind::Array(_, len) => { + // Generics are not required to implement Debug + if self.has_type_param_in_array(ctx) { + Some(( + format!("{}: Array with length {}", name, len), + vec![], + )) + } else if len < RUST_DERIVE_IN_ARRAY_LIMIT || + ctx.options().rust_features().larger_arrays + { + // The simple case + debug_print(name, quote! { #name_ident }) + } else if ctx.options().use_core { + // There is no String in core; reducing field visibility to avoid breaking + // no_std setups. + Some((format!("{}: [...]", name), vec![])) + } else { + // Let's implement our own print function + Some(( + format!("{}: [{{}}]", name), + vec![quote! { + self.#name_ident + .iter() + .enumerate() + .map(|(i, v)| format!("{}{:?}", if i > 0 { ", " } else { "" }, v)) + .collect::() + }], + )) + } + } + TypeKind::Vector(_, len) => { + if ctx.options().use_core { + // There is no format! in core; reducing field visibility to avoid breaking + // no_std setups. + Some((format!("{}(...)", name), vec![])) + } else { + let self_ids = 0..len; + Some(( + format!("{}({{}})", name), + vec![quote! { + #(format!("{:?}", self.#self_ids)),* + }], + )) + } + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + // We follow the aliases + ctx.resolve_item(t).impl_debug(ctx, name) + } + + TypeKind::Pointer(inner) => { + let inner_type = ctx.resolve_type(inner).canonical_type(ctx); + match *inner_type.kind() { + TypeKind::Function(ref sig) + if !sig.function_pointers_can_derive() => + { + Some((format!("{}: FunctionPointer", name), vec![])) + } + _ => debug_print(name, quote! { #name_ident }), + } + } + + TypeKind::Opaque => None, + } + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/impl_partialeq.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,142 @@ +use crate::ir::comp::{CompInfo, CompKind, Field, FieldMethods}; +use crate::ir::context::BindgenContext; +use crate::ir::item::{IsOpaque, Item}; +use crate::ir::ty::{TypeKind, RUST_DERIVE_IN_ARRAY_LIMIT}; + +/// Generate a manual implementation of `PartialEq` trait for the +/// specified compound type. +pub(crate) fn gen_partialeq_impl( + ctx: &BindgenContext, + comp_info: &CompInfo, + item: &Item, + ty_for_impl: &proc_macro2::TokenStream, +) -> Option { + let mut tokens = vec![]; + + if item.is_opaque(ctx, &()) { + tokens.push(quote! { + &self._bindgen_opaque_blob[..] == &other._bindgen_opaque_blob[..] + }); + } else if comp_info.kind() == CompKind::Union { + assert!(!ctx.options().untagged_union); + tokens.push(quote! { + &self.bindgen_union_field[..] == &other.bindgen_union_field[..] + }); + } else { + for base in comp_info.base_members().iter() { + if !base.requires_storage(ctx) { + continue; + } + + let ty_item = ctx.resolve_item(base.ty); + let field_name = &base.field_name; + + if ty_item.is_opaque(ctx, &()) { + let field_name = ctx.rust_ident(field_name); + tokens.push(quote! { + &self. #field_name [..] == &other. #field_name [..] + }); + } else { + tokens.push(gen_field(ctx, ty_item, field_name)); + } + } + + for field in comp_info.fields() { + match *field { + Field::DataMember(ref fd) => { + let ty_item = ctx.resolve_item(fd.ty()); + let name = fd.name().unwrap(); + tokens.push(gen_field(ctx, ty_item, name)); + } + Field::Bitfields(ref bu) => { + for bitfield in bu.bitfields() { + if bitfield.name().is_some() { + let getter_name = bitfield.getter_name(); + let name_ident = ctx.rust_ident_raw(getter_name); + tokens.push(quote! { + self.#name_ident () == other.#name_ident () + }); + } + } + } + } + } + } + + Some(quote! { + fn eq(&self, other: & #ty_for_impl) -> bool { + #( #tokens )&&* + } + }) +} + +fn gen_field( + ctx: &BindgenContext, + ty_item: &Item, + name: &str, +) -> proc_macro2::TokenStream { + fn quote_equals( + name_ident: proc_macro2::Ident, + ) -> proc_macro2::TokenStream { + quote! { self.#name_ident == other.#name_ident } + } + + let name_ident = ctx.rust_ident(name); + let ty = ty_item.expect_type(); + + match *ty.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Complex(..) | + TypeKind::Float(..) | + TypeKind::Enum(..) | + TypeKind::TypeParam | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::Reference(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::Comp(..) | + TypeKind::Pointer(_) | + TypeKind::Function(..) | + TypeKind::Opaque => quote_equals(name_ident), + + TypeKind::TemplateInstantiation(ref inst) => { + if inst.is_opaque(ctx, ty_item) { + quote! { + &self. #name_ident [..] == &other. #name_ident [..] + } + } else { + quote_equals(name_ident) + } + } + + TypeKind::Array(_, len) => { + if len <= RUST_DERIVE_IN_ARRAY_LIMIT || + ctx.options().rust_features().larger_arrays + { + quote_equals(name_ident) + } else { + quote! { + &self. #name_ident [..] == &other. #name_ident [..] + } + } + } + TypeKind::Vector(_, len) => { + let self_ids = 0..len; + let other_ids = 0..len; + quote! { + #(self.#self_ids == other.#other_ids &&)* true + } + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + let inner_item = ctx.resolve_item(t); + gen_field(ctx, inner_item, name) + } + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/mod.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,5366 @@ +mod dyngen; +mod error; +mod helpers; +mod impl_debug; +mod impl_partialeq; +mod postprocessing; +mod serialize; +pub(crate) mod struct_layout; + +#[cfg(test)] +#[allow(warnings)] +pub(crate) mod bitfield_unit; +#[cfg(all(test, target_endian = "little"))] +mod bitfield_unit_tests; + +use self::dyngen::DynamicItems; +use self::helpers::attributes; +use self::struct_layout::StructLayoutTracker; + +use super::BindgenOptions; + +use crate::callbacks::{DeriveInfo, TypeKind as DeriveTypeKind}; +use crate::ir::analysis::{HasVtable, Sizedness}; +use crate::ir::annotations::{ + Annotations, FieldAccessorKind, FieldVisibilityKind, +}; +use crate::ir::comp::{ + Bitfield, BitfieldUnit, CompInfo, CompKind, Field, FieldData, FieldMethods, + Method, MethodKind, +}; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::derive::{ + CanDerive, CanDeriveCopy, CanDeriveDebug, CanDeriveDefault, CanDeriveEq, + CanDeriveHash, CanDeriveOrd, CanDerivePartialEq, CanDerivePartialOrd, +}; +use crate::ir::dot; +use crate::ir::enum_ty::{Enum, EnumVariant, EnumVariantValue}; +use crate::ir::function::{ + Abi, ClangAbi, Function, FunctionKind, FunctionSig, Linkage, +}; +use crate::ir::int::IntKind; +use crate::ir::item::{IsOpaque, Item, ItemCanonicalName, ItemCanonicalPath}; +use crate::ir::item_kind::ItemKind; +use crate::ir::layout::Layout; +use crate::ir::module::Module; +use crate::ir::objc::{ObjCInterface, ObjCMethod}; +use crate::ir::template::{ + AsTemplateParam, TemplateInstantiation, TemplateParameters, +}; +use crate::ir::ty::{Type, TypeKind}; +use crate::ir::var::Var; + +use proc_macro2::{self, Ident, Span}; +use quote::TokenStreamExt; + +use crate::{Entry, HashMap, HashSet}; +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::VecDeque; +use std::fmt::{self, Write}; +use std::ops; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum CodegenError { + Serialize { msg: String, loc: String }, + Io(String), +} + +impl From for CodegenError { + fn from(err: std::io::Error) -> Self { + Self::Io(err.to_string()) + } +} + +impl fmt::Display for CodegenError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Serialize { msg, loc } => { + write!(f, "serialization error at {}: {}", loc, msg) + } + Self::Io(err) => err.fmt(f), + } + } +} + +// Name of type defined in constified enum module +pub(crate) static CONSTIFIED_ENUM_MODULE_REPR_NAME: &str = "Type"; + +fn top_level_path( + ctx: &BindgenContext, + item: &Item, +) -> Vec { + let mut path = vec![quote! { self }]; + + if ctx.options().enable_cxx_namespaces { + for _ in 0..item.codegen_depth(ctx) { + path.push(quote! { super }); + } + } + + path +} + +fn root_import( + ctx: &BindgenContext, + module: &Item, +) -> proc_macro2::TokenStream { + assert!(ctx.options().enable_cxx_namespaces, "Somebody messed it up"); + assert!(module.is_module()); + + let mut path = top_level_path(ctx, module); + + let root = ctx.root_module().canonical_name(ctx); + let root_ident = ctx.rust_ident(root); + path.push(quote! { #root_ident }); + + let mut tokens = quote! {}; + tokens.append_separated(path, quote!(::)); + + quote! { + #[allow(unused_imports)] + use #tokens ; + } +} + +bitflags! { + struct DerivableTraits: u16 { + const DEBUG = 1 << 0; + const DEFAULT = 1 << 1; + const COPY = 1 << 2; + const CLONE = 1 << 3; + const HASH = 1 << 4; + const PARTIAL_ORD = 1 << 5; + const ORD = 1 << 6; + const PARTIAL_EQ = 1 << 7; + const EQ = 1 << 8; + } +} + +fn derives_of_item( + item: &Item, + ctx: &BindgenContext, + packed: bool, +) -> DerivableTraits { + let mut derivable_traits = DerivableTraits::empty(); + + let all_template_params = item.all_template_params(ctx); + + if item.can_derive_copy(ctx) && !item.annotations().disallow_copy() { + derivable_traits |= DerivableTraits::COPY; + + if ctx.options().rust_features().builtin_clone_impls || + !all_template_params.is_empty() + { + // FIXME: This requires extra logic if you have a big array in a + // templated struct. The reason for this is that the magic: + // fn clone(&self) -> Self { *self } + // doesn't work for templates. + // + // It's not hard to fix though. + derivable_traits |= DerivableTraits::CLONE; + } + } else if packed { + // If the struct or union is packed, deriving from Copy is required for + // deriving from any other trait. + return derivable_traits; + } + + if item.can_derive_debug(ctx) && !item.annotations().disallow_debug() { + derivable_traits |= DerivableTraits::DEBUG; + } + + if item.can_derive_default(ctx) && !item.annotations().disallow_default() { + derivable_traits |= DerivableTraits::DEFAULT; + } + + if item.can_derive_hash(ctx) { + derivable_traits |= DerivableTraits::HASH; + } + + if item.can_derive_partialord(ctx) { + derivable_traits |= DerivableTraits::PARTIAL_ORD; + } + + if item.can_derive_ord(ctx) { + derivable_traits |= DerivableTraits::ORD; + } + + if item.can_derive_partialeq(ctx) { + derivable_traits |= DerivableTraits::PARTIAL_EQ; + } + + if item.can_derive_eq(ctx) { + derivable_traits |= DerivableTraits::EQ; + } + + derivable_traits +} + +impl From for Vec<&'static str> { + fn from(derivable_traits: DerivableTraits) -> Vec<&'static str> { + [ + (DerivableTraits::DEBUG, "Debug"), + (DerivableTraits::DEFAULT, "Default"), + (DerivableTraits::COPY, "Copy"), + (DerivableTraits::CLONE, "Clone"), + (DerivableTraits::HASH, "Hash"), + (DerivableTraits::PARTIAL_ORD, "PartialOrd"), + (DerivableTraits::ORD, "Ord"), + (DerivableTraits::PARTIAL_EQ, "PartialEq"), + (DerivableTraits::EQ, "Eq"), + ] + .iter() + .filter_map(|&(flag, derive)| { + Some(derive).filter(|_| derivable_traits.contains(flag)) + }) + .collect() + } +} + +struct CodegenResult<'a> { + items: Vec, + dynamic_items: DynamicItems, + + /// A monotonic counter used to add stable unique ID's to stuff that doesn't + /// need to be referenced by anything. + codegen_id: &'a Cell, + + /// Whether a bindgen union has been generated at least once. + saw_bindgen_union: bool, + + /// Whether an incomplete array has been generated at least once. + saw_incomplete_array: bool, + + /// Whether Objective C types have been seen at least once. + saw_objc: bool, + + /// Whether Apple block types have been seen at least once. + saw_block: bool, + + /// Whether a bitfield allocation unit has been seen at least once. + saw_bitfield_unit: bool, + + items_seen: HashSet, + /// The set of generated function/var names, needed because in C/C++ is + /// legal to do something like: + /// + /// ```c++ + /// extern "C" { + /// void foo(); + /// extern int bar; + /// } + /// + /// extern "C" { + /// void foo(); + /// extern int bar; + /// } + /// ``` + /// + /// Being these two different declarations. + functions_seen: HashSet, + vars_seen: HashSet, + + /// Used for making bindings to overloaded functions. Maps from a canonical + /// function name to the number of overloads we have already codegen'd for + /// that name. This lets us give each overload a unique suffix. + overload_counters: HashMap, + + items_to_serialize: Vec, +} + +impl<'a> CodegenResult<'a> { + fn new(codegen_id: &'a Cell) -> Self { + CodegenResult { + items: vec![], + dynamic_items: DynamicItems::new(), + saw_bindgen_union: false, + saw_incomplete_array: false, + saw_objc: false, + saw_block: false, + saw_bitfield_unit: false, + codegen_id, + items_seen: Default::default(), + functions_seen: Default::default(), + vars_seen: Default::default(), + overload_counters: Default::default(), + items_to_serialize: Default::default(), + } + } + + fn dynamic_items(&mut self) -> &mut DynamicItems { + &mut self.dynamic_items + } + + fn saw_bindgen_union(&mut self) { + self.saw_bindgen_union = true; + } + + fn saw_incomplete_array(&mut self) { + self.saw_incomplete_array = true; + } + + fn saw_objc(&mut self) { + self.saw_objc = true; + } + + fn saw_block(&mut self) { + self.saw_block = true; + } + + fn saw_bitfield_unit(&mut self) { + self.saw_bitfield_unit = true; + } + + fn seen>(&self, item: Id) -> bool { + self.items_seen.contains(&item.into()) + } + + fn set_seen>(&mut self, item: Id) { + self.items_seen.insert(item.into()); + } + + fn seen_function(&self, name: &str) -> bool { + self.functions_seen.contains(name) + } + + fn saw_function(&mut self, name: &str) { + self.functions_seen.insert(name.into()); + } + + /// Get the overload number for the given function name. Increments the + /// counter internally so the next time we ask for the overload for this + /// name, we get the incremented value, and so on. + fn overload_number(&mut self, name: &str) -> u32 { + let counter = self.overload_counters.entry(name.into()).or_insert(0); + let number = *counter; + *counter += 1; + number + } + + fn seen_var(&self, name: &str) -> bool { + self.vars_seen.contains(name) + } + + fn saw_var(&mut self, name: &str) { + self.vars_seen.insert(name.into()); + } + + fn inner(&mut self, cb: F) -> Vec + where + F: FnOnce(&mut Self), + { + let mut new = Self::new(self.codegen_id); + + cb(&mut new); + + self.saw_incomplete_array |= new.saw_incomplete_array; + self.saw_objc |= new.saw_objc; + self.saw_block |= new.saw_block; + self.saw_bitfield_unit |= new.saw_bitfield_unit; + self.saw_bindgen_union |= new.saw_bindgen_union; + + new.items + } +} + +impl<'a> ops::Deref for CodegenResult<'a> { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.items + } +} + +impl<'a> ops::DerefMut for CodegenResult<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.items + } +} + +/// A trait to convert a rust type into a pointer, optionally const, to the same +/// type. +trait ToPtr { + fn to_ptr(self, is_const: bool) -> proc_macro2::TokenStream; +} + +impl ToPtr for proc_macro2::TokenStream { + fn to_ptr(self, is_const: bool) -> proc_macro2::TokenStream { + if is_const { + quote! { *const #self } + } else { + quote! { *mut #self } + } + } +} + +/// An extension trait for `proc_macro2::TokenStream` that lets us append any implicit +/// template parameters that exist for some type, if necessary. +trait AppendImplicitTemplateParams { + fn append_implicit_template_params( + &mut self, + ctx: &BindgenContext, + item: &Item, + ); +} + +impl AppendImplicitTemplateParams for proc_macro2::TokenStream { + fn append_implicit_template_params( + &mut self, + ctx: &BindgenContext, + item: &Item, + ) { + let item = item.id().into_resolver().through_type_refs().resolve(ctx); + + match *item.expect_type().kind() { + TypeKind::UnresolvedTypeRef(..) => { + unreachable!("already resolved unresolved type refs") + } + TypeKind::ResolvedTypeRef(..) => { + unreachable!("we resolved item through type refs") + } + + // None of these types ever have implicit template parameters. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Pointer(..) | + TypeKind::Reference(..) | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Array(..) | + TypeKind::TypeParam | + TypeKind::Opaque | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::TemplateInstantiation(..) => return, + _ => {} + } + + let params: Vec<_> = item + .used_template_params(ctx) + .iter() + .map(|p| { + p.try_to_rust_ty(ctx, &()) + .expect("template params cannot fail to be a rust type") + }) + .collect(); + if !params.is_empty() { + self.append_all(quote! { + < #( #params ),* > + }); + } + } +} + +trait CodeGenerator { + /// Extra information from the caller. + type Extra; + + /// Extra information returned to the caller. + type Return; + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + extra: &Self::Extra, + ) -> Self::Return; +} + +impl Item { + fn process_before_codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult, + ) -> bool { + if !self.is_enabled_for_codegen(ctx) { + return false; + } + + if self.is_blocklisted(ctx) || result.seen(self.id()) { + debug!( + "::process_before_codegen: Ignoring hidden or seen: \ + self = {:?}", + self + ); + return false; + } + + if !ctx.codegen_items().contains(&self.id()) { + // TODO(emilio, #453): Figure out what to do when this happens + // legitimately, we could track the opaque stuff and disable the + // assertion there I guess. + warn!("Found non-allowlisted item in code generation: {:?}", self); + } + + result.set_seen(self.id()); + true + } +} + +impl CodeGenerator for Item { + type Extra = (); + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + _extra: &(), + ) { + debug!("::codegen: self = {:?}", self); + if !self.process_before_codegen(ctx, result) { + return; + } + + match *self.kind() { + ItemKind::Module(ref module) => { + module.codegen(ctx, result, self); + } + ItemKind::Function(ref fun) => { + fun.codegen(ctx, result, self); + } + ItemKind::Var(ref var) => { + var.codegen(ctx, result, self); + } + ItemKind::Type(ref ty) => { + ty.codegen(ctx, result, self); + } + } + } +} + +impl CodeGenerator for Module { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + + let codegen_self = |result: &mut CodegenResult, + found_any: &mut bool| { + for child in self.children() { + if ctx.codegen_items().contains(child) { + *found_any = true; + ctx.resolve_item(*child).codegen(ctx, result, &()); + } + } + + if item.id() == ctx.root_module() { + if result.saw_block { + utils::prepend_block_header(ctx, &mut *result); + } + if result.saw_bindgen_union { + utils::prepend_union_types(ctx, &mut *result); + } + if result.saw_incomplete_array { + utils::prepend_incomplete_array_types(ctx, &mut *result); + } + if ctx.need_bindgen_complex_type() { + utils::prepend_complex_type(&mut *result); + } + if result.saw_objc { + utils::prepend_objc_header(ctx, &mut *result); + } + if result.saw_bitfield_unit { + utils::prepend_bitfield_unit_type(ctx, &mut *result); + } + } + }; + + if !ctx.options().enable_cxx_namespaces || + (self.is_inline() && + !ctx.options().conservative_inline_namespaces) + { + codegen_self(result, &mut false); + return; + } + + let mut found_any = false; + let inner_items = result.inner(|result| { + result.push(root_import(ctx, item)); + + let path = item.namespace_aware_canonical_path(ctx).join("::"); + if let Some(raw_lines) = ctx.options().module_lines.get(&path) { + for raw_line in raw_lines { + found_any = true; + result.push( + proc_macro2::TokenStream::from_str(raw_line).unwrap(), + ); + } + } + + codegen_self(result, &mut found_any); + }); + + // Don't bother creating an empty module. + if !found_any { + return; + } + + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(name); + result.push(if item.id() == ctx.root_module() { + quote! { + #[allow(non_snake_case, non_camel_case_types, non_upper_case_globals)] + pub mod #ident { + #( #inner_items )* + } + } + } else { + quote! { + pub mod #ident { + #( #inner_items )* + } + } + }); + } +} + +impl CodeGenerator for Var { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + use crate::ir::var::VarType; + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let canonical_name = item.canonical_name(ctx); + + if result.seen_var(&canonical_name) { + return; + } + result.saw_var(&canonical_name); + + let canonical_ident = ctx.rust_ident(&canonical_name); + + // We can't generate bindings to static variables of templates. The + // number of actual variables for a single declaration are open ended + // and we don't know what instantiations do or don't exist. + if !item.all_template_params(ctx).is_empty() { + return; + } + + let mut attrs = vec![]; + if let Some(comment) = item.comment(ctx) { + attrs.push(attributes::doc(comment)); + } + + let ty = self.ty().to_rust_ty_or_opaque(ctx, &()); + + if let Some(val) = self.val() { + match *val { + VarType::Bool(val) => { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #val ; + }); + } + VarType::Int(val) => { + let int_kind = self + .ty() + .into_resolver() + .through_type_aliases() + .through_type_refs() + .resolve(ctx) + .expect_type() + .as_integer() + .unwrap(); + let val = if int_kind.is_signed() { + helpers::ast_ty::int_expr(val) + } else { + helpers::ast_ty::uint_expr(val as _) + }; + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #val ; + }); + } + VarType::String(ref bytes) => { + // Account the trailing zero. + // + // TODO: Here we ignore the type we just made up, probably + // we should refactor how the variable type and ty ID work. + let len = bytes.len() + 1; + let ty = quote! { + [u8; #len] + }; + + match String::from_utf8(bytes.clone()) { + Ok(string) => { + let cstr = helpers::ast_ty::cstr_expr(string); + if ctx + .options() + .rust_features + .static_lifetime_elision + { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : &#ty = #cstr ; + }); + } else { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : &'static #ty = #cstr ; + }); + } + } + Err(..) => { + let bytes = helpers::ast_ty::byte_array_expr(bytes); + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #bytes ; + }); + } + } + } + VarType::Float(f) => { + if let Ok(expr) = helpers::ast_ty::float_expr(ctx, f) { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #expr ; + }); + } + } + VarType::Char(c) => { + result.push(quote! { + #(#attrs)* + pub const #canonical_ident : #ty = #c ; + }); + } + } + } else { + // If necessary, apply a `#[link_name]` attribute + if let Some(link_name) = self.link_name() { + attrs.push(attributes::link_name::(link_name)); + } else { + let link_name = + self.mangled_name().unwrap_or_else(|| self.name()); + if !utils::names_will_be_identical_after_mangling( + &canonical_name, + link_name, + None, + ) { + attrs.push(attributes::link_name::(link_name)); + } + } + + let maybe_mut = if self.is_const() { + quote! {} + } else { + quote! { mut } + }; + + let tokens = quote!( + extern "C" { + #(#attrs)* + pub static #maybe_mut #canonical_ident: #ty; + } + ); + + result.push(tokens); + } + } +} + +impl CodeGenerator for Type { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + match *self.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Array(..) | + TypeKind::Vector(..) | + TypeKind::Pointer(..) | + TypeKind::Reference(..) | + TypeKind::Function(..) | + TypeKind::ResolvedTypeRef(..) | + TypeKind::Opaque | + TypeKind::TypeParam => { + // These items don't need code generation, they only need to be + // converted to rust types in fields, arguments, and such. + // NOTE(emilio): If you add to this list, make sure to also add + // it to BindgenContext::compute_allowlisted_and_codegen_items. + } + TypeKind::TemplateInstantiation(ref inst) => { + inst.codegen(ctx, result, item) + } + TypeKind::BlockPointer(inner) => { + if !ctx.options().generate_block { + return; + } + + let inner_item = + inner.into_resolver().through_type_refs().resolve(ctx); + let name = item.canonical_name(ctx); + + let inner_rust_type = { + if let TypeKind::Function(fnsig) = + inner_item.kind().expect_type().kind() + { + utils::fnsig_block(ctx, fnsig) + } else { + panic!("invalid block typedef: {:?}", inner_item) + } + }; + + let rust_name = ctx.rust_ident(name); + + let mut tokens = if let Some(comment) = item.comment(ctx) { + attributes::doc(comment) + } else { + quote! {} + }; + + tokens.append_all(quote! { + pub type #rust_name = #inner_rust_type ; + }); + + result.push(tokens); + result.saw_block(); + } + TypeKind::Comp(ref ci) => ci.codegen(ctx, result, item), + TypeKind::TemplateAlias(inner, _) | TypeKind::Alias(inner) => { + let inner_item = + inner.into_resolver().through_type_refs().resolve(ctx); + let name = item.canonical_name(ctx); + let path = item.canonical_path(ctx); + + { + let through_type_aliases = inner + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(ctx); + + // Try to catch the common pattern: + // + // typedef struct foo { ... } foo; + // + // here, and also other more complex cases like #946. + if through_type_aliases.canonical_path(ctx) == path { + return; + } + } + + // If this is a known named type, disallow generating anything + // for it too. If size_t -> usize conversions are enabled, we + // need to check that these conversions are permissible, but + // nothing needs to be generated, still. + let spelling = self.name().expect("Unnamed alias?"); + if utils::type_from_named(ctx, spelling).is_some() { + if let "size_t" | "ssize_t" = spelling { + let layout = inner_item + .kind() + .expect_type() + .layout(ctx) + .expect("No layout?"); + assert_eq!( + layout.size, + ctx.target_pointer_size(), + "Target platform requires `--no-size_t-is-usize`. The size of `{}` ({}) does not match the target pointer size ({})", + spelling, + layout.size, + ctx.target_pointer_size(), + ); + assert_eq!( + layout.align, + ctx.target_pointer_size(), + "Target platform requires `--no-size_t-is-usize`. The alignment of `{}` ({}) does not match the target pointer size ({})", + spelling, + layout.align, + ctx.target_pointer_size(), + ); + } + return; + } + + let mut outer_params = item.used_template_params(ctx); + + let is_opaque = item.is_opaque(ctx, &()); + let inner_rust_type = if is_opaque { + outer_params = vec![]; + self.to_opaque(ctx, item) + } else { + // Its possible that we have better layout information than + // the inner type does, so fall back to an opaque blob based + // on our layout if converting the inner item fails. + let mut inner_ty = inner_item + .try_to_rust_ty_or_opaque(ctx, &()) + .unwrap_or_else(|_| self.to_opaque(ctx, item)); + inner_ty.append_implicit_template_params(ctx, inner_item); + inner_ty + }; + + { + // FIXME(emilio): This is a workaround to avoid generating + // incorrect type aliases because of types that we haven't + // been able to resolve (because, eg, they depend on a + // template parameter). + // + // It's kind of a shame not generating them even when they + // could be referenced, but we already do the same for items + // with invalid template parameters, and at least this way + // they can be replaced, instead of generating plain invalid + // code. + let inner_canon_type = + inner_item.expect_type().canonical_type(ctx); + if inner_canon_type.is_invalid_type_param() { + warn!( + "Item contained invalid named type, skipping: \ + {:?}, {:?}", + item, inner_item + ); + return; + } + } + + let rust_name = ctx.rust_ident(&name); + + let mut tokens = if let Some(comment) = item.comment(ctx) { + attributes::doc(comment) + } else { + quote! {} + }; + + let alias_style = if ctx.options().type_alias.matches(&name) { + AliasVariation::TypeAlias + } else if ctx.options().new_type_alias.matches(&name) { + AliasVariation::NewType + } else if ctx.options().new_type_alias_deref.matches(&name) { + AliasVariation::NewTypeDeref + } else { + ctx.options().default_alias_style + }; + + // We prefer using `pub use` over `pub type` because of: + // https://github.com/rust-lang/rust/issues/26264 + // These are the only characters allowed in simple + // paths, eg `good::dogs::Bront`. + if inner_rust_type.to_string().chars().all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | ':' | '_' | ' ')) && outer_params.is_empty() && + !is_opaque && + alias_style == AliasVariation::TypeAlias && + inner_item.expect_type().canonical_type(ctx).is_enum() + { + tokens.append_all(quote! { + pub use + }); + let path = top_level_path(ctx, item); + tokens.append_separated(path, quote!(::)); + tokens.append_all(quote! { + :: #inner_rust_type as #rust_name ; + }); + result.push(tokens); + return; + } + + tokens.append_all(match alias_style { + AliasVariation::TypeAlias => quote! { + pub type #rust_name + }, + AliasVariation::NewType | AliasVariation::NewTypeDeref => { + assert!( + ctx.options().rust_features().repr_transparent, + "repr_transparent feature is required to use {:?}", + alias_style + ); + + let mut attributes = + vec![attributes::repr("transparent")]; + let packed = false; // Types can't be packed in Rust. + let derivable_traits = + derives_of_item(item, ctx, packed); + if !derivable_traits.is_empty() { + let derives: Vec<_> = derivable_traits.into(); + attributes.push(attributes::derives(&derives)) + } + + quote! { + #( #attributes )* + pub struct #rust_name + } + } + }); + + let params: Vec<_> = outer_params + .into_iter() + .filter_map(|p| p.as_template_param(ctx, &())) + .collect(); + if params + .iter() + .any(|p| ctx.resolve_type(*p).is_invalid_type_param()) + { + warn!( + "Item contained invalid template \ + parameter: {:?}", + item + ); + return; + } + let params: Vec<_> = params + .iter() + .map(|p| { + p.try_to_rust_ty(ctx, &()).expect( + "type parameters can always convert to rust ty OK", + ) + }) + .collect(); + + if !params.is_empty() { + tokens.append_all(quote! { + < #( #params ),* > + }); + } + + let access_spec = + access_specifier(ctx.options().default_visibility); + tokens.append_all(match alias_style { + AliasVariation::TypeAlias => quote! { + = #inner_rust_type ; + }, + AliasVariation::NewType | AliasVariation::NewTypeDeref => { + quote! { + (#access_spec #inner_rust_type) ; + } + } + }); + + if alias_style == AliasVariation::NewTypeDeref { + let prefix = ctx.trait_prefix(); + tokens.append_all(quote! { + impl ::#prefix::ops::Deref for #rust_name { + type Target = #inner_rust_type; + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } + } + impl ::#prefix::ops::DerefMut for #rust_name { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + }); + } + + result.push(tokens); + } + TypeKind::Enum(ref ei) => ei.codegen(ctx, result, item), + TypeKind::ObjCId | TypeKind::ObjCSel => { + result.saw_objc(); + } + TypeKind::ObjCInterface(ref interface) => { + interface.codegen(ctx, result, item) + } + ref u @ TypeKind::UnresolvedTypeRef(..) => { + unreachable!("Should have been resolved after parsing {:?}!", u) + } + } + } +} + +struct Vtable<'a> { + item_id: ItemId, + /// A reference to the originating compound object. + #[allow(dead_code)] + comp_info: &'a CompInfo, +} + +impl<'a> Vtable<'a> { + fn new(item_id: ItemId, comp_info: &'a CompInfo) -> Self { + Vtable { item_id, comp_info } + } +} + +impl<'a> CodeGenerator for Vtable<'a> { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + assert_eq!(item.id(), self.item_id); + debug_assert!(item.is_enabled_for_codegen(ctx)); + let name = ctx.rust_ident(self.canonical_name(ctx)); + + // For now, we will only generate vtables for classes that: + // - do not inherit from others (compilers merge VTable from primary parent class). + // - do not contain a virtual destructor (requires ordering; platforms generate different vtables). + if ctx.options().vtable_generation && + self.comp_info.base_members().is_empty() && + self.comp_info.destructor().is_none() + { + let class_ident = ctx.rust_ident(self.item_id.canonical_name(ctx)); + + let methods = self + .comp_info + .methods() + .iter() + .filter_map(|m| { + if !m.is_virtual() { + return None; + } + + let function_item = ctx.resolve_item(m.signature()); + let function = function_item.expect_function(); + let signature_item = ctx.resolve_item(function.signature()); + let signature = match signature_item.expect_type().kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("Function signature type mismatch"), + }; + + // FIXME: Is there a canonical name without the class prepended? + let function_name = function_item.canonical_name(ctx); + + // FIXME: Need to account for overloading with times_seen (separately from regular function path). + let function_name = ctx.rust_ident(function_name); + let mut args = utils::fnsig_arguments(ctx, signature); + let ret = utils::fnsig_return_ty(ctx, signature); + + args[0] = if m.is_const() { + quote! { this: *const #class_ident } + } else { + quote! { this: *mut #class_ident } + }; + + Some(quote! { + pub #function_name : unsafe extern "C" fn( #( #args ),* ) #ret + }) + }) + .collect::>(); + + result.push(quote! { + #[repr(C)] + pub struct #name { + #( #methods ),* + } + }) + } else { + // For the cases we don't support, simply generate an empty struct. + let void = helpers::ast_ty::c_void(ctx); + + result.push(quote! { + #[repr(C)] + pub struct #name ( #void ); + }); + } + } +} + +impl<'a> ItemCanonicalName for Vtable<'a> { + fn canonical_name(&self, ctx: &BindgenContext) -> String { + format!("{}__bindgen_vtable", self.item_id.canonical_name(ctx)) + } +} + +impl<'a> TryToRustTy for Vtable<'a> { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + let name = ctx.rust_ident(self.canonical_name(ctx)); + Ok(quote! { + #name + }) + } +} + +impl CodeGenerator for TemplateInstantiation { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug_assert!(item.is_enabled_for_codegen(ctx)); + + // Although uses of instantiations don't need code generation, and are + // just converted to rust types in fields, vars, etc, we take this + // opportunity to generate tests for their layout here. If the + // instantiation is opaque, then its presumably because we don't + // properly understand it (maybe because of specializations), and so we + // shouldn't emit layout tests either. + if !ctx.options().layout_tests || self.is_opaque(ctx, item) { + return; + } + + // If there are any unbound type parameters, then we can't generate a + // layout test because we aren't dealing with a concrete type with a + // concrete size and alignment. + if ctx.uses_any_template_parameters(item.id()) { + return; + } + + let layout = item.kind().expect_type().layout(ctx); + + if let Some(layout) = layout { + let size = layout.size; + let align = layout.align; + + let name = item.full_disambiguated_name(ctx); + let mut fn_name = + format!("__bindgen_test_layout_{}_instantiation", name); + let times_seen = result.overload_number(&fn_name); + if times_seen > 0 { + write!(&mut fn_name, "_{}", times_seen).unwrap(); + } + + let fn_name = ctx.rust_ident_raw(fn_name); + + let prefix = ctx.trait_prefix(); + let ident = item.to_rust_ty_or_opaque(ctx, &()); + let size_of_expr = quote! { + ::#prefix::mem::size_of::<#ident>() + }; + let align_of_expr = quote! { + ::#prefix::mem::align_of::<#ident>() + }; + + let item = quote! { + #[test] + fn #fn_name() { + assert_eq!(#size_of_expr, #size, + concat!("Size of template specialization: ", + stringify!(#ident))); + assert_eq!(#align_of_expr, #align, + concat!("Alignment of template specialization: ", + stringify!(#ident))); + } + }; + + result.push(item); + } + } +} + +/// Trait for implementing the code generation of a struct or union field. +trait FieldCodegen<'a> { + type Extra; + + #[allow(clippy::too_many_arguments)] + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + extra: Self::Extra, + ) where + F: Extend, + M: Extend; +} + +impl<'a> FieldCodegen<'a> for Field { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + match *self { + Field::DataMember(ref data) => { + data.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (), + ); + } + Field::Bitfields(ref unit) => { + unit.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (), + ); + } + } + } +} + +fn wrap_union_field_if_needed( + ctx: &BindgenContext, + struct_layout: &StructLayoutTracker, + ty: proc_macro2::TokenStream, + result: &mut CodegenResult, +) -> proc_macro2::TokenStream { + if struct_layout.is_rust_union() { + if struct_layout.can_copy_union_fields() { + ty + } else { + let prefix = ctx.trait_prefix(); + quote! { + ::#prefix::mem::ManuallyDrop<#ty> + } + } + } else { + result.saw_bindgen_union(); + if ctx.options().enable_cxx_namespaces { + quote! { + root::__BindgenUnionField<#ty> + } + } else { + quote! { + __BindgenUnionField<#ty> + } + } + } +} + +impl<'a> FieldCodegen<'a> for FieldData { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + parent_visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + // Bitfields are handled by `FieldCodegen` implementations for + // `BitfieldUnit` and `Bitfield`. + assert!(self.bitfield_width().is_none()); + + let field_item = + self.ty().into_resolver().through_type_refs().resolve(ctx); + let field_ty = field_item.expect_type(); + let mut ty = self.ty().to_rust_ty_or_opaque(ctx, &()); + ty.append_implicit_template_params(ctx, field_item); + + // NB: If supported, we use proper `union` types. + let ty = if parent.is_union() { + wrap_union_field_if_needed(ctx, struct_layout, ty, result) + } else if let Some(item) = field_ty.is_incomplete_array(ctx) { + result.saw_incomplete_array(); + + let inner = item.to_rust_ty_or_opaque(ctx, &()); + + if ctx.options().enable_cxx_namespaces { + quote! { + root::__IncompleteArrayField<#inner> + } + } else { + quote! { + __IncompleteArrayField<#inner> + } + } + } else { + ty + }; + + let mut field = quote! {}; + if ctx.options().generate_comments { + if let Some(raw_comment) = self.comment() { + let comment = ctx.options().process_comment(raw_comment); + field = attributes::doc(comment); + } + } + + let field_name = self + .name() + .map(|name| ctx.rust_mangle(name).into_owned()) + .expect("Each field should have a name in codegen!"); + let field_ident = ctx.rust_ident_raw(field_name.as_str()); + + if let Some(padding_field) = + struct_layout.saw_field(&field_name, field_ty, self.offset()) + { + fields.extend(Some(padding_field)); + } + + let visibility = compute_visibility( + ctx, + self.is_public(), + Some(self.annotations()), + parent_visibility_kind, + ); + let accessor_kind = + self.annotations().accessor_kind().unwrap_or(accessor_kind); + + match visibility { + FieldVisibilityKind::Private => { + field.append_all(quote! { + #field_ident : #ty , + }); + } + FieldVisibilityKind::PublicCrate => { + field.append_all(quote! { + pub(crate) #field_ident : #ty , + }); + } + FieldVisibilityKind::Public => { + field.append_all(quote! { + pub #field_ident : #ty , + }); + } + } + + fields.extend(Some(field)); + + // TODO: Factor the following code out, please! + if accessor_kind == FieldAccessorKind::None { + return; + } + + let getter_name = ctx.rust_ident_raw(format!("get_{}", field_name)); + let mutable_getter_name = + ctx.rust_ident_raw(format!("get_{}_mut", field_name)); + let field_name = ctx.rust_ident_raw(field_name); + + methods.extend(Some(match accessor_kind { + FieldAccessorKind::None => unreachable!(), + FieldAccessorKind::Regular => { + quote! { + #[inline] + pub fn #getter_name(&self) -> & #ty { + &self.#field_name + } + + #[inline] + pub fn #mutable_getter_name(&mut self) -> &mut #ty { + &mut self.#field_name + } + } + } + FieldAccessorKind::Unsafe => { + quote! { + #[inline] + pub unsafe fn #getter_name(&self) -> & #ty { + &self.#field_name + } + + #[inline] + pub unsafe fn #mutable_getter_name(&mut self) -> &mut #ty { + &mut self.#field_name + } + } + } + FieldAccessorKind::Immutable => { + quote! { + #[inline] + pub fn #getter_name(&self) -> & #ty { + &self.#field_name + } + } + } + })); + } +} + +impl BitfieldUnit { + /// Get the constructor name for this bitfield unit. + fn ctor_name(&self) -> proc_macro2::TokenStream { + let ctor_name = Ident::new( + &format!("new_bitfield_{}", self.nth()), + Span::call_site(), + ); + quote! { + #ctor_name + } + } +} + +impl Bitfield { + /// Extend an under construction bitfield unit constructor with this + /// bitfield. This sets the relevant bits on the `__bindgen_bitfield_unit` + /// variable that's being constructed. + fn extend_ctor_impl( + &self, + ctx: &BindgenContext, + param_name: proc_macro2::TokenStream, + mut ctor_impl: proc_macro2::TokenStream, + ) -> proc_macro2::TokenStream { + let bitfield_ty = ctx.resolve_type(self.ty()); + let bitfield_ty_layout = bitfield_ty + .layout(ctx) + .expect("Bitfield without layout? Gah!"); + let bitfield_int_ty = helpers::integer_type(ctx, bitfield_ty_layout) + .expect( + "Should already have verified that the bitfield is \ + representable as an int", + ); + + let offset = self.offset_into_unit(); + let width = self.width() as u8; + let prefix = ctx.trait_prefix(); + + ctor_impl.append_all(quote! { + __bindgen_bitfield_unit.set( + #offset, + #width, + { + let #param_name: #bitfield_int_ty = unsafe { + ::#prefix::mem::transmute(#param_name) + }; + #param_name as u64 + } + ); + }); + + ctor_impl + } +} + +fn access_specifier( + visibility: FieldVisibilityKind, +) -> proc_macro2::TokenStream { + match visibility { + FieldVisibilityKind::Private => quote! {}, + FieldVisibilityKind::PublicCrate => quote! { pub(crate) }, + FieldVisibilityKind::Public => quote! { pub }, + } +} + +/// Compute a fields or structs visibility based on multiple conditions. +/// 1. If the element was declared public, and we respect such CXX accesses specs +/// (context option) => By default Public, but this can be overruled by an `annotation`. +/// +/// 2. If the element was declared private, and we respect such CXX accesses specs +/// (context option) => By default Private, but this can be overruled by an `annotation`. +/// +/// 3. If we do not respect visibility modifiers, the result depends on the `annotation`, +/// if any, or the passed `default_kind`. +/// +fn compute_visibility( + ctx: &BindgenContext, + is_declared_public: bool, + annotations: Option<&Annotations>, + default_kind: FieldVisibilityKind, +) -> FieldVisibilityKind { + match ( + is_declared_public, + ctx.options().respect_cxx_access_specs, + annotations.and_then(|e| e.visibility_kind()), + ) { + (true, true, annotated_visibility) => { + // declared as public, cxx specs are respected + annotated_visibility.unwrap_or(FieldVisibilityKind::Public) + } + (false, true, annotated_visibility) => { + // declared as private, cxx specs are respected + annotated_visibility.unwrap_or(FieldVisibilityKind::Private) + } + (_, false, annotated_visibility) => { + // cxx specs are not respected, declaration does not matter. + annotated_visibility.unwrap_or(default_kind) + } + } +} + +impl<'a> FieldCodegen<'a> for BitfieldUnit { + type Extra = (); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + accessor_kind: FieldAccessorKind, + parent: &CompInfo, + result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + fields: &mut F, + methods: &mut M, + _: (), + ) where + F: Extend, + M: Extend, + { + use crate::ir::ty::RUST_DERIVE_IN_ARRAY_LIMIT; + + result.saw_bitfield_unit(); + + let layout = self.layout(); + let unit_field_ty = helpers::bitfield_unit(ctx, layout); + let field_ty = { + let unit_field_ty = unit_field_ty.clone(); + if parent.is_union() { + wrap_union_field_if_needed( + ctx, + struct_layout, + unit_field_ty, + result, + ) + } else { + unit_field_ty + } + }; + + { + let align_field_name = format!("_bitfield_align_{}", self.nth()); + let align_field_ident = ctx.rust_ident(align_field_name); + let align_ty = match self.layout().align { + n if n >= 8 => quote! { u64 }, + 4 => quote! { u32 }, + 2 => quote! { u16 }, + _ => quote! { u8 }, + }; + let access_spec = access_specifier(visibility_kind); + let align_field = quote! { + #access_spec #align_field_ident: [#align_ty; 0], + }; + fields.extend(Some(align_field)); + } + + let unit_field_name = format!("_bitfield_{}", self.nth()); + let unit_field_ident = ctx.rust_ident(&unit_field_name); + + let ctor_name = self.ctor_name(); + let mut ctor_params = vec![]; + let mut ctor_impl = quote! {}; + + // We cannot generate any constructor if the underlying storage can't + // implement AsRef<[u8]> / AsMut<[u8]> / etc, or can't derive Default. + // + // We don't check `larger_arrays` here because Default does still have + // the 32 items limitation. + let mut generate_ctor = layout.size <= RUST_DERIVE_IN_ARRAY_LIMIT; + + let mut all_fields_declared_as_public = true; + for bf in self.bitfields() { + // Codegen not allowed for anonymous bitfields + if bf.name().is_none() { + continue; + } + + if layout.size > RUST_DERIVE_IN_ARRAY_LIMIT && + !ctx.options().rust_features().larger_arrays + { + continue; + } + + all_fields_declared_as_public &= bf.is_public(); + let mut bitfield_representable_as_int = true; + bf.codegen( + ctx, + visibility_kind, + accessor_kind, + parent, + result, + struct_layout, + fields, + methods, + (&unit_field_name, &mut bitfield_representable_as_int), + ); + + // Generating a constructor requires the bitfield to be representable as an integer. + if !bitfield_representable_as_int { + generate_ctor = false; + continue; + } + + let param_name = bitfield_getter_name(ctx, bf); + let bitfield_ty_item = ctx.resolve_item(bf.ty()); + let bitfield_ty = bitfield_ty_item.expect_type(); + let bitfield_ty = + bitfield_ty.to_rust_ty_or_opaque(ctx, bitfield_ty_item); + + ctor_params.push(quote! { + #param_name : #bitfield_ty + }); + ctor_impl = bf.extend_ctor_impl(ctx, param_name, ctor_impl); + } + + let visibility_kind = compute_visibility( + ctx, + all_fields_declared_as_public, + None, + visibility_kind, + ); + let access_spec = access_specifier(visibility_kind); + + let field = quote! { + #access_spec #unit_field_ident : #field_ty , + }; + fields.extend(Some(field)); + + if generate_ctor { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #ctor_name ( #( #ctor_params ),* ) -> #unit_field_ty { + let mut __bindgen_bitfield_unit: #unit_field_ty = Default::default(); + #ctor_impl + __bindgen_bitfield_unit + } + })); + } + + struct_layout.saw_bitfield_unit(layout); + } +} + +fn bitfield_getter_name( + ctx: &BindgenContext, + bitfield: &Bitfield, +) -> proc_macro2::TokenStream { + let name = bitfield.getter_name(); + let name = ctx.rust_ident_raw(name); + quote! { #name } +} + +fn bitfield_setter_name( + ctx: &BindgenContext, + bitfield: &Bitfield, +) -> proc_macro2::TokenStream { + let setter = bitfield.setter_name(); + let setter = ctx.rust_ident_raw(setter); + quote! { #setter } +} + +impl<'a> FieldCodegen<'a> for Bitfield { + type Extra = (&'a str, &'a mut bool); + + fn codegen( + &self, + ctx: &BindgenContext, + visibility_kind: FieldVisibilityKind, + _accessor_kind: FieldAccessorKind, + parent: &CompInfo, + _result: &mut CodegenResult, + struct_layout: &mut StructLayoutTracker, + _fields: &mut F, + methods: &mut M, + (unit_field_name, bitfield_representable_as_int): (&'a str, &mut bool), + ) where + F: Extend, + M: Extend, + { + let prefix = ctx.trait_prefix(); + let getter_name = bitfield_getter_name(ctx, self); + let setter_name = bitfield_setter_name(ctx, self); + let unit_field_ident = Ident::new(unit_field_name, Span::call_site()); + + let bitfield_ty_item = ctx.resolve_item(self.ty()); + let bitfield_ty = bitfield_ty_item.expect_type(); + + let bitfield_ty_layout = bitfield_ty + .layout(ctx) + .expect("Bitfield without layout? Gah!"); + let bitfield_int_ty = + match helpers::integer_type(ctx, bitfield_ty_layout) { + Some(int_ty) => { + *bitfield_representable_as_int = true; + int_ty + } + None => { + *bitfield_representable_as_int = false; + return; + } + }; + + let bitfield_ty = + bitfield_ty.to_rust_ty_or_opaque(ctx, bitfield_ty_item); + + let offset = self.offset_into_unit(); + let width = self.width() as u8; + + let visibility_kind = compute_visibility( + ctx, + self.is_public(), + Some(self.annotations()), + visibility_kind, + ); + let access_spec = access_specifier(visibility_kind); + + if parent.is_union() && !struct_layout.is_rust_union() { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #getter_name(&self) -> #bitfield_ty { + unsafe { + ::#prefix::mem::transmute( + self.#unit_field_ident.as_ref().get(#offset, #width) + as #bitfield_int_ty + ) + } + } + + #[inline] + #access_spec fn #setter_name(&mut self, val: #bitfield_ty) { + unsafe { + let val: #bitfield_int_ty = ::#prefix::mem::transmute(val); + self.#unit_field_ident.as_mut().set( + #offset, + #width, + val as u64 + ) + } + } + })); + } else { + methods.extend(Some(quote! { + #[inline] + #access_spec fn #getter_name(&self) -> #bitfield_ty { + unsafe { + ::#prefix::mem::transmute( + self.#unit_field_ident.get(#offset, #width) + as #bitfield_int_ty + ) + } + } + + #[inline] + #access_spec fn #setter_name(&mut self, val: #bitfield_ty) { + unsafe { + let val: #bitfield_int_ty = ::#prefix::mem::transmute(val); + self.#unit_field_ident.set( + #offset, + #width, + val as u64 + ) + } + } + })); + } + } +} + +impl CodeGenerator for CompInfo { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + // Don't output classes with template parameters that aren't types, and + // also don't output template specializations, neither total or partial. + if self.has_non_type_template_params() { + return; + } + + let ty = item.expect_type(); + let layout = ty.layout(ctx); + let mut packed = self.is_packed(ctx, layout.as_ref()); + + let canonical_name = item.canonical_name(ctx); + let canonical_ident = ctx.rust_ident(&canonical_name); + + // Generate the vtable from the method list if appropriate. + // + // TODO: I don't know how this could play with virtual methods that are + // not in the list of methods found by us, we'll see. Also, could the + // order of the vtable pointers vary? + // + // FIXME: Once we generate proper vtables, we need to codegen the + // vtable, but *not* generate a field for it in the case that + // HasVtable::has_vtable_ptr is false but HasVtable::has_vtable is true. + // + // Also, we need to generate the vtable in such a way it "inherits" from + // the parent too. + let is_opaque = item.is_opaque(ctx, &()); + let mut fields = vec![]; + let mut struct_layout = + StructLayoutTracker::new(ctx, self, ty, &canonical_name); + + if !is_opaque { + if item.has_vtable_ptr(ctx) { + let vtable = Vtable::new(item.id(), self); + vtable.codegen(ctx, result, item); + + let vtable_type = vtable + .try_to_rust_ty(ctx, &()) + .expect("vtable to Rust type conversion is infallible") + .to_ptr(true); + + fields.push(quote! { + pub vtable_: #vtable_type , + }); + + struct_layout.saw_vtable(); + } + + for base in self.base_members() { + if !base.requires_storage(ctx) { + continue; + } + + let inner_item = ctx.resolve_item(base.ty); + let mut inner = inner_item.to_rust_ty_or_opaque(ctx, &()); + inner.append_implicit_template_params(ctx, inner_item); + let field_name = ctx.rust_ident(&base.field_name); + + struct_layout.saw_base(inner_item.expect_type()); + + let visibility = match ( + base.is_public(), + ctx.options().respect_cxx_access_specs, + ) { + (true, true) => FieldVisibilityKind::Public, + (false, true) => FieldVisibilityKind::Private, + _ => ctx.options().default_visibility, + }; + + let access_spec = access_specifier(visibility); + fields.push(quote! { + #access_spec #field_name: #inner, + }); + } + } + + let mut methods = vec![]; + if !is_opaque { + let visibility = item + .annotations() + .visibility_kind() + .unwrap_or(ctx.options().default_visibility); + let struct_accessor_kind = item + .annotations() + .accessor_kind() + .unwrap_or(FieldAccessorKind::None); + for field in self.fields() { + field.codegen( + ctx, + visibility, + struct_accessor_kind, + self, + result, + &mut struct_layout, + &mut fields, + &mut methods, + (), + ); + } + // Check whether an explicit padding field is needed + // at the end. + if let Some(comp_layout) = layout { + fields.extend( + struct_layout + .add_tail_padding(&canonical_name, comp_layout), + ); + } + } + + if is_opaque { + // Opaque item should not have generated methods, fields. + debug_assert!(fields.is_empty()); + debug_assert!(methods.is_empty()); + } + + let is_union = self.kind() == CompKind::Union; + let layout = item.kind().expect_type().layout(ctx); + let zero_sized = item.is_zero_sized(ctx); + let forward_decl = self.is_forward_declaration(); + + let mut explicit_align = None; + + // C++ requires every struct to be addressable, so what C++ compilers do + // is making the struct 1-byte sized. + // + // This is apparently not the case for C, see: + // https://github.com/rust-lang/rust-bindgen/issues/551 + // + // Just get the layout, and assume C++ if not. + // + // NOTE: This check is conveniently here to avoid the dummy fields we + // may add for unused template parameters. + if !forward_decl && zero_sized { + let has_address = if is_opaque { + // Generate the address field if it's an opaque type and + // couldn't determine the layout of the blob. + layout.is_none() + } else { + layout.map_or(true, |l| l.size != 0) + }; + + if has_address { + let layout = Layout::new(1, 1); + let ty = helpers::blob(ctx, Layout::new(1, 1)); + struct_layout.saw_field_with_layout( + "_address", + layout, + /* offset = */ Some(0), + ); + fields.push(quote! { + pub _address: #ty, + }); + } + } + + if is_opaque { + match layout { + Some(l) => { + explicit_align = Some(l.align); + + let ty = helpers::blob(ctx, l); + fields.push(quote! { + pub _bindgen_opaque_blob: #ty , + }); + } + None => { + warn!("Opaque type without layout! Expect dragons!"); + } + } + } else if !is_union && !zero_sized { + if let Some(padding_field) = + layout.and_then(|layout| struct_layout.pad_struct(layout)) + { + fields.push(padding_field); + } + + if let Some(layout) = layout { + if struct_layout.requires_explicit_align(layout) { + if layout.align == 1 { + packed = true; + } else { + explicit_align = Some(layout.align); + if !ctx.options().rust_features.repr_align { + let ty = helpers::blob( + ctx, + Layout::new(0, layout.align), + ); + fields.push(quote! { + pub __bindgen_align: #ty , + }); + } + } + } + } + } else if is_union && !forward_decl { + // TODO(emilio): It'd be nice to unify this with the struct path + // above somehow. + let layout = layout.expect("Unable to get layout information?"); + if struct_layout.requires_explicit_align(layout) { + explicit_align = Some(layout.align); + } + + if !struct_layout.is_rust_union() { + let ty = helpers::blob(ctx, layout); + fields.push(quote! { + pub bindgen_union_field: #ty , + }) + } + } + + if forward_decl { + fields.push(quote! { + _unused: [u8; 0], + }); + } + + let mut generic_param_names = vec![]; + + for (idx, ty) in item.used_template_params(ctx).iter().enumerate() { + let param = ctx.resolve_type(*ty); + let name = param.name().unwrap(); + let ident = ctx.rust_ident(name); + generic_param_names.push(ident.clone()); + + let prefix = ctx.trait_prefix(); + let field_name = ctx.rust_ident(format!("_phantom_{}", idx)); + fields.push(quote! { + pub #field_name : ::#prefix::marker::PhantomData< + ::#prefix::cell::UnsafeCell<#ident> + > , + }); + } + + let generics = if !generic_param_names.is_empty() { + let generic_param_names = generic_param_names.clone(); + quote! { + < #( #generic_param_names ),* > + } + } else { + quote! {} + }; + + let mut attributes = vec![]; + let mut needs_clone_impl = false; + let mut needs_default_impl = false; + let mut needs_debug_impl = false; + let mut needs_partialeq_impl = false; + if let Some(comment) = item.comment(ctx) { + attributes.push(attributes::doc(comment)); + } + if packed && !is_opaque { + let n = layout.map_or(1, |l| l.align); + assert!(ctx.options().rust_features().repr_packed_n || n == 1); + let packed_repr = if n == 1 { + "packed".to_string() + } else { + format!("packed({})", n) + }; + attributes.push(attributes::repr_list(&["C", &packed_repr])); + } else { + attributes.push(attributes::repr("C")); + } + + if ctx.options().rust_features().repr_align { + if let Some(explicit) = explicit_align { + // Ensure that the struct has the correct alignment even in + // presence of alignas. + let explicit = helpers::ast_ty::int_expr(explicit as i64); + attributes.push(quote! { + #[repr(align(#explicit))] + }); + } + } + + let derivable_traits = derives_of_item(item, ctx, packed); + if !derivable_traits.contains(DerivableTraits::DEBUG) { + needs_debug_impl = ctx.options().derive_debug && + ctx.options().impl_debug && + !ctx.no_debug_by_name(item) && + !item.annotations().disallow_debug(); + } + + if !derivable_traits.contains(DerivableTraits::DEFAULT) { + needs_default_impl = ctx.options().derive_default && + !self.is_forward_declaration() && + !ctx.no_default_by_name(item) && + !item.annotations().disallow_default(); + } + + let all_template_params = item.all_template_params(ctx); + + if derivable_traits.contains(DerivableTraits::COPY) && + !derivable_traits.contains(DerivableTraits::CLONE) + { + needs_clone_impl = true; + } + + if !derivable_traits.contains(DerivableTraits::PARTIAL_EQ) { + needs_partialeq_impl = ctx.options().derive_partialeq && + ctx.options().impl_partialeq && + ctx.lookup_can_derive_partialeq_or_partialord(item.id()) == + CanDerive::Manually; + } + + let mut derives: Vec<_> = derivable_traits.into(); + derives.extend(item.annotations().derives().iter().map(String::as_str)); + + let is_rust_union = is_union && struct_layout.is_rust_union(); + + // The custom derives callback may return a list of derive attributes; + // add them to the end of the list. + let custom_derives = ctx.options().all_callbacks(|cb| { + cb.add_derives(&DeriveInfo { + name: &canonical_name, + kind: if is_rust_union { + DeriveTypeKind::Union + } else { + DeriveTypeKind::Struct + }, + }) + }); + // In most cases this will be a no-op, since custom_derives will be empty. + derives.extend(custom_derives.iter().map(|s| s.as_str())); + + if !derives.is_empty() { + attributes.push(attributes::derives(&derives)) + } + + if item.must_use(ctx) { + attributes.push(attributes::must_use()); + } + + let mut tokens = if is_rust_union { + quote! { + #( #attributes )* + pub union #canonical_ident + } + } else { + quote! { + #( #attributes )* + pub struct #canonical_ident + } + }; + + tokens.append_all(quote! { + #generics { + #( #fields )* + } + }); + result.push(tokens); + + // Generate the inner types and all that stuff. + // + // TODO: In the future we might want to be smart, and use nested + // modules, and whatnot. + for ty in self.inner_types() { + let child_item = ctx.resolve_item(*ty); + // assert_eq!(child_item.parent_id(), item.id()); + child_item.codegen(ctx, result, &()); + } + + // NOTE: Some unexposed attributes (like alignment attributes) may + // affect layout, so we're bad and pray to the gods for avoid sending + // all the tests to shit when parsing things like max_align_t. + if self.found_unknown_attr() { + warn!( + "Type {} has an unknown attribute that may affect layout", + canonical_ident + ); + } + + if all_template_params.is_empty() { + if !is_opaque { + for var in self.inner_vars() { + ctx.resolve_item(*var).codegen(ctx, result, &()); + } + } + + if ctx.options().layout_tests && !self.is_forward_declaration() { + if let Some(layout) = layout { + let fn_name = + format!("bindgen_test_layout_{}", canonical_ident); + let fn_name = ctx.rust_ident_raw(fn_name); + let prefix = ctx.trait_prefix(); + let size_of_expr = quote! { + ::#prefix::mem::size_of::<#canonical_ident>() + }; + let align_of_expr = quote! { + ::#prefix::mem::align_of::<#canonical_ident>() + }; + let size = layout.size; + let align = layout.align; + + let check_struct_align = if align > + ctx.target_pointer_size() && + !ctx.options().rust_features().repr_align + { + None + } else { + Some(quote! { + assert_eq!(#align_of_expr, + #align, + concat!("Alignment of ", stringify!(#canonical_ident))); + + }) + }; + + let should_skip_field_offset_checks = is_opaque; + + let check_field_offset = if should_skip_field_offset_checks + { + vec![] + } else { + self.fields() + .iter() + .filter_map(|field| match *field { + Field::DataMember(ref f) if f.name().is_some() => Some(f), + _ => None, + }) + .flat_map(|field| { + let name = field.name().unwrap(); + field.offset().map(|offset| { + let field_offset = offset / 8; + let field_name = ctx.rust_ident(name); + quote! { + assert_eq!( + unsafe { + ::#prefix::ptr::addr_of!((*ptr).#field_name) as usize - ptr as usize + }, + #field_offset, + concat!("Offset of field: ", stringify!(#canonical_ident), "::", stringify!(#field_name)) + ); + } + }) + }) + .collect() + }; + + let uninit_decl = if !check_field_offset.is_empty() { + // FIXME: When MSRV >= 1.59.0, we can use + // > const PTR: *const #canonical_ident = ::#prefix::mem::MaybeUninit::uninit().as_ptr(); + Some(quote! { + // Use a shared MaybeUninit so that rustc with + // opt-level=0 doesn't take too much stack space, + // see #2218. + const UNINIT: ::#prefix::mem::MaybeUninit<#canonical_ident> = ::#prefix::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + }) + } else { + None + }; + + let item = quote! { + #[test] + fn #fn_name() { + #uninit_decl + assert_eq!(#size_of_expr, + #size, + concat!("Size of: ", stringify!(#canonical_ident))); + #check_struct_align + #( #check_field_offset )* + } + }; + result.push(item); + } + } + + let mut method_names = Default::default(); + if ctx.options().codegen_config.methods() { + for method in self.methods() { + assert!(method.kind() != MethodKind::Constructor); + method.codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + + if ctx.options().codegen_config.constructors() { + for sig in self.constructors() { + Method::new( + MethodKind::Constructor, + *sig, + /* const */ + false, + ) + .codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + + if ctx.options().codegen_config.destructors() { + if let Some((kind, destructor)) = self.destructor() { + debug_assert!(kind.is_destructor()); + Method::new(kind, destructor, false).codegen_method( + ctx, + &mut methods, + &mut method_names, + result, + self, + ); + } + } + } + + // NB: We can't use to_rust_ty here since for opaque types this tries to + // use the specialization knowledge to generate a blob field. + let ty_for_impl = quote! { + #canonical_ident #generics + }; + + if needs_clone_impl { + result.push(quote! { + impl #generics Clone for #ty_for_impl { + fn clone(&self) -> Self { *self } + } + }); + } + + if needs_default_impl { + let prefix = ctx.trait_prefix(); + let body = if ctx.options().rust_features().maybe_uninit { + quote! { + let mut s = ::#prefix::mem::MaybeUninit::::uninit(); + unsafe { + ::#prefix::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } + } else { + quote! { + unsafe { + let mut s: Self = ::#prefix::mem::uninitialized(); + ::#prefix::ptr::write_bytes(&mut s, 0, 1); + s + } + } + }; + // Note we use `ptr::write_bytes()` instead of `mem::zeroed()` because the latter does + // not necessarily ensure padding bytes are zeroed. Some C libraries are sensitive to + // non-zero padding bytes, especially when forwards/backwards compatability is + // involved. + result.push(quote! { + impl #generics Default for #ty_for_impl { + fn default() -> Self { + #body + } + } + }); + } + + if needs_debug_impl { + let impl_ = impl_debug::gen_debug_impl( + ctx, + self.fields(), + item, + self.kind(), + ); + + let prefix = ctx.trait_prefix(); + + result.push(quote! { + impl #generics ::#prefix::fmt::Debug for #ty_for_impl { + #impl_ + } + }); + } + + if needs_partialeq_impl { + if let Some(impl_) = impl_partialeq::gen_partialeq_impl( + ctx, + self, + item, + &ty_for_impl, + ) { + let partialeq_bounds = if !generic_param_names.is_empty() { + let bounds = generic_param_names.iter().map(|t| { + quote! { #t: PartialEq } + }); + quote! { where #( #bounds ),* } + } else { + quote! {} + }; + + let prefix = ctx.trait_prefix(); + result.push(quote! { + impl #generics ::#prefix::cmp::PartialEq for #ty_for_impl #partialeq_bounds { + #impl_ + } + }); + } + } + + if !methods.is_empty() { + result.push(quote! { + impl #generics #ty_for_impl { + #( #methods )* + } + }); + } + } +} + +impl Method { + fn codegen_method( + &self, + ctx: &BindgenContext, + methods: &mut Vec, + method_names: &mut HashSet, + result: &mut CodegenResult<'_>, + _parent: &CompInfo, + ) { + assert!({ + let cc = &ctx.options().codegen_config; + match self.kind() { + MethodKind::Constructor => cc.constructors(), + MethodKind::Destructor => cc.destructors(), + MethodKind::VirtualDestructor { .. } => cc.destructors(), + MethodKind::Static | + MethodKind::Normal | + MethodKind::Virtual { .. } => cc.methods(), + } + }); + + // TODO(emilio): We could generate final stuff at least. + if self.is_virtual() { + return; // FIXME + } + + // First of all, output the actual function. + let function_item = ctx.resolve_item(self.signature()); + if !function_item.process_before_codegen(ctx, result) { + return; + } + let function = function_item.expect_function(); + let times_seen = function.codegen(ctx, result, function_item); + let times_seen = match times_seen { + Some(seen) => seen, + None => return, + }; + let signature_item = ctx.resolve_item(function.signature()); + let mut name = match self.kind() { + MethodKind::Constructor => "new".into(), + MethodKind::Destructor => "destruct".into(), + _ => function.name().to_owned(), + }; + + let signature = match *signature_item.expect_type().kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("How in the world?"), + }; + + let supported_abi = match signature.abi(ctx, Some(&*name)) { + ClangAbi::Known(Abi::ThisCall) => { + ctx.options().rust_features().thiscall_abi + } + ClangAbi::Known(Abi::Vectorcall) => { + ctx.options().rust_features().vectorcall_abi + } + ClangAbi::Known(Abi::CUnwind) => { + ctx.options().rust_features().c_unwind_abi + } + ClangAbi::Known(Abi::EfiApi) => { + ctx.options().rust_features().abi_efiapi + } + _ => true, + }; + + if !supported_abi { + return; + } + + // Do not generate variadic methods, since rust does not allow + // implementing them, and we don't do a good job at it anyway. + if signature.is_variadic() { + return; + } + + if method_names.contains(&name) { + let mut count = 1; + let mut new_name; + + while { + new_name = format!("{}{}", name, count); + method_names.contains(&new_name) + } { + count += 1; + } + + name = new_name; + } + + method_names.insert(name.clone()); + + let mut function_name = function_item.canonical_name(ctx); + if times_seen > 0 { + write!(&mut function_name, "{}", times_seen).unwrap(); + } + let function_name = ctx.rust_ident(function_name); + let mut args = utils::fnsig_arguments(ctx, signature); + let mut ret = utils::fnsig_return_ty(ctx, signature); + + if !self.is_static() && !self.is_constructor() { + args[0] = if self.is_const() { + quote! { &self } + } else { + quote! { &mut self } + }; + } + + // If it's a constructor, we always return `Self`, and we inject the + // "this" parameter, so there's no need to ask the user for it. + // + // Note that constructors in Clang are represented as functions with + // return-type = void. + if self.is_constructor() { + args.remove(0); + ret = quote! { -> Self }; + } + + let mut exprs = + helpers::ast_ty::arguments_from_signature(signature, ctx); + + let mut stmts = vec![]; + + // If it's a constructor, we need to insert an extra parameter with a + // variable called `__bindgen_tmp` we're going to create. + if self.is_constructor() { + let prefix = ctx.trait_prefix(); + let tmp_variable_decl = if ctx + .options() + .rust_features() + .maybe_uninit + { + exprs[0] = quote! { + __bindgen_tmp.as_mut_ptr() + }; + quote! { + let mut __bindgen_tmp = ::#prefix::mem::MaybeUninit::uninit() + } + } else { + exprs[0] = quote! { + &mut __bindgen_tmp + }; + quote! { + let mut __bindgen_tmp = ::#prefix::mem::uninitialized() + } + }; + stmts.push(tmp_variable_decl); + } else if !self.is_static() { + assert!(!exprs.is_empty()); + exprs[0] = quote! { + self + }; + }; + + let call = quote! { + #function_name (#( #exprs ),* ) + }; + + stmts.push(call); + + if self.is_constructor() { + stmts.push(if ctx.options().rust_features().maybe_uninit { + quote! { + __bindgen_tmp.assume_init() + } + } else { + quote! { + __bindgen_tmp + } + }) + } + + let block = ctx.wrap_unsafe_ops(quote! ( #( #stmts );*)); + + let mut attrs = vec![attributes::inline()]; + + if signature.must_use() && + ctx.options().rust_features().must_use_function + { + attrs.push(attributes::must_use()); + } + + let name = ctx.rust_ident(&name); + methods.push(quote! { + #(#attrs)* + pub unsafe fn #name ( #( #args ),* ) #ret { + #block + } + }); + } +} + +/// A helper type that represents different enum variations. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum EnumVariation { + /// The code for this enum will use a Rust enum. Note that creating this in unsafe code + /// (including FFI) with an invalid value will invoke undefined behaviour, whether or not + /// its marked as non_exhaustive. + Rust { + /// Indicates whether the generated struct should be `#[non_exhaustive]` + non_exhaustive: bool, + }, + /// The code for this enum will use a newtype + NewType { + /// Indicates whether the newtype will have bitwise operators + is_bitfield: bool, + /// Indicates whether the variants will be represented as global constants + is_global: bool, + }, + /// The code for this enum will use consts + Consts, + /// The code for this enum will use a module containing consts + ModuleConsts, +} + +impl EnumVariation { + fn is_rust(&self) -> bool { + matches!(*self, EnumVariation::Rust { .. }) + } + + /// Both the `Const` and `ModuleConsts` variants will cause this to return + /// true. + fn is_const(&self) -> bool { + matches!(*self, EnumVariation::Consts | EnumVariation::ModuleConsts) + } +} + +impl Default for EnumVariation { + fn default() -> EnumVariation { + EnumVariation::Consts + } +} + +impl fmt::Display for EnumVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Rust { + non_exhaustive: false, + } => "rust", + Self::Rust { + non_exhaustive: true, + } => "rust_non_exhaustive", + Self::NewType { + is_bitfield: true, .. + } => "bitfield", + Self::NewType { + is_bitfield: false, + is_global, + } => { + if *is_global { + "newtype_global" + } else { + "newtype" + } + } + Self::Consts => "consts", + Self::ModuleConsts => "moduleconsts", + }; + s.fmt(f) + } +} + +impl std::str::FromStr for EnumVariation { + type Err = std::io::Error; + + /// Create a `EnumVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "rust" => Ok(EnumVariation::Rust { + non_exhaustive: false, + }), + "rust_non_exhaustive" => Ok(EnumVariation::Rust { + non_exhaustive: true, + }), + "bitfield" => Ok(EnumVariation::NewType { + is_bitfield: true, + is_global: false, + }), + "consts" => Ok(EnumVariation::Consts), + "moduleconsts" => Ok(EnumVariation::ModuleConsts), + "newtype" => Ok(EnumVariation::NewType { + is_bitfield: false, + is_global: false, + }), + "newtype_global" => Ok(EnumVariation::NewType { + is_bitfield: false, + is_global: true, + }), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid EnumVariation. Accepted values ", + "are 'rust', 'rust_non_exhaustive', 'bitfield', 'consts',", + "'moduleconsts', 'newtype' and 'newtype_global'." + ), + )), + } + } +} + +/// A helper type to construct different enum variations. +enum EnumBuilder<'a> { + Rust { + attrs: Vec, + ident: Ident, + tokens: proc_macro2::TokenStream, + emitted_any_variants: bool, + }, + NewType { + canonical_name: &'a str, + tokens: proc_macro2::TokenStream, + is_bitfield: bool, + is_global: bool, + }, + Consts { + variants: Vec, + }, + ModuleConsts { + module_name: &'a str, + module_items: Vec, + }, +} + +impl<'a> EnumBuilder<'a> { + /// Returns true if the builder is for a rustified enum. + fn is_rust_enum(&self) -> bool { + matches!(*self, EnumBuilder::Rust { .. }) + } + + /// Create a new enum given an item builder, a canonical name, a name for + /// the representation, and which variation it should be generated as. + fn new( + name: &'a str, + mut attrs: Vec, + repr: proc_macro2::TokenStream, + enum_variation: EnumVariation, + has_typedef: bool, + ) -> Self { + let ident = Ident::new(name, Span::call_site()); + + match enum_variation { + EnumVariation::NewType { + is_bitfield, + is_global, + } => EnumBuilder::NewType { + canonical_name: name, + tokens: quote! { + #( #attrs )* + pub struct #ident (pub #repr); + }, + is_bitfield, + is_global, + }, + + EnumVariation::Rust { .. } => { + // `repr` is guaranteed to be Rustified in Enum::codegen + attrs.insert(0, quote! { #[repr( #repr )] }); + let tokens = quote!(); + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants: false, + } + } + + EnumVariation::Consts => { + let mut variants = Vec::new(); + + if !has_typedef { + variants.push(quote! { + #( #attrs )* + pub type #ident = #repr; + }); + } + + EnumBuilder::Consts { variants } + } + + EnumVariation::ModuleConsts => { + let ident = Ident::new( + CONSTIFIED_ENUM_MODULE_REPR_NAME, + Span::call_site(), + ); + let type_definition = quote! { + #( #attrs )* + pub type #ident = #repr; + }; + + EnumBuilder::ModuleConsts { + module_name: name, + module_items: vec![type_definition], + } + } + } + } + + /// Add a variant to this enum. + fn with_variant( + self, + ctx: &BindgenContext, + variant: &EnumVariant, + mangling_prefix: Option<&str>, + rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + is_ty_named: bool, + ) -> Self { + let variant_name = ctx.rust_mangle(variant.name()); + let is_rust_enum = self.is_rust_enum(); + let expr = match variant.val() { + EnumVariantValue::Boolean(v) if is_rust_enum => { + helpers::ast_ty::uint_expr(v as u64) + } + EnumVariantValue::Boolean(v) => quote!(#v), + EnumVariantValue::Signed(v) => helpers::ast_ty::int_expr(v), + EnumVariantValue::Unsigned(v) => helpers::ast_ty::uint_expr(v), + }; + + let mut doc = quote! {}; + if ctx.options().generate_comments { + if let Some(raw_comment) = variant.comment() { + let comment = ctx.options().process_comment(raw_comment); + doc = attributes::doc(comment); + } + } + + match self { + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants: _, + } => { + let name = ctx.rust_ident(variant_name); + EnumBuilder::Rust { + attrs, + ident, + tokens: quote! { + #tokens + #doc + #name = #expr, + }, + emitted_any_variants: true, + } + } + + EnumBuilder::NewType { + canonical_name, + is_global, + .. + } => { + if ctx.options().rust_features().associated_const && + is_ty_named && + !is_global + { + let enum_ident = ctx.rust_ident(canonical_name); + let variant_ident = ctx.rust_ident(variant_name); + + result.push(quote! { + impl #enum_ident { + #doc + pub const #variant_ident : #rust_ty = #rust_ty ( #expr ); + } + }); + } else { + let ident = ctx.rust_ident(match mangling_prefix { + Some(prefix) => { + Cow::Owned(format!("{}_{}", prefix, variant_name)) + } + None => variant_name, + }); + result.push(quote! { + #doc + pub const #ident : #rust_ty = #rust_ty ( #expr ); + }); + } + + self + } + + EnumBuilder::Consts { .. } => { + let constant_name = match mangling_prefix { + Some(prefix) => { + Cow::Owned(format!("{}_{}", prefix, variant_name)) + } + None => variant_name, + }; + + let ident = ctx.rust_ident(constant_name); + result.push(quote! { + #doc + pub const #ident : #rust_ty = #expr ; + }); + + self + } + EnumBuilder::ModuleConsts { + module_name, + mut module_items, + } => { + let name = ctx.rust_ident(variant_name); + let ty = ctx.rust_ident(CONSTIFIED_ENUM_MODULE_REPR_NAME); + module_items.push(quote! { + #doc + pub const #name : #ty = #expr ; + }); + + EnumBuilder::ModuleConsts { + module_name, + module_items, + } + } + } + } + + fn build( + self, + ctx: &BindgenContext, + rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + ) -> proc_macro2::TokenStream { + match self { + EnumBuilder::Rust { + attrs, + ident, + tokens, + emitted_any_variants, + .. + } => { + let variants = if !emitted_any_variants { + quote!(__bindgen_cannot_repr_c_on_empty_enum = 0) + } else { + tokens + }; + + quote! { + #( #attrs )* + pub enum #ident { + #variants + } + } + } + EnumBuilder::NewType { + canonical_name, + tokens, + is_bitfield, + .. + } => { + if !is_bitfield { + return tokens; + } + + let rust_ty_name = ctx.rust_ident_raw(canonical_name); + let prefix = ctx.trait_prefix(); + + result.push(quote! { + impl ::#prefix::ops::BitOr<#rust_ty> for #rust_ty { + type Output = Self; + + #[inline] + fn bitor(self, other: Self) -> Self { + #rust_ty_name(self.0 | other.0) + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitOrAssign for #rust_ty { + #[inline] + fn bitor_assign(&mut self, rhs: #rust_ty) { + self.0 |= rhs.0; + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitAnd<#rust_ty> for #rust_ty { + type Output = Self; + + #[inline] + fn bitand(self, other: Self) -> Self { + #rust_ty_name(self.0 & other.0) + } + } + }); + + result.push(quote! { + impl ::#prefix::ops::BitAndAssign for #rust_ty { + #[inline] + fn bitand_assign(&mut self, rhs: #rust_ty) { + self.0 &= rhs.0; + } + } + }); + + tokens + } + EnumBuilder::Consts { variants, .. } => quote! { #( #variants )* }, + EnumBuilder::ModuleConsts { + module_items, + module_name, + .. + } => { + let ident = ctx.rust_ident(module_name); + quote! { + pub mod #ident { + #( #module_items )* + } + } + } + } + } +} + +impl CodeGenerator for Enum { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(&name); + let enum_ty = item.expect_type(); + let layout = enum_ty.layout(ctx); + let variation = self.computed_enum_variation(ctx, item); + + let repr_translated; + let repr = match self.repr().map(|repr| ctx.resolve_type(repr)) { + Some(repr) + if !ctx.options().translate_enum_integer_types && + !variation.is_rust() => + { + repr + } + repr => { + // An enum's integer type is translated to a native Rust + // integer type in 3 cases: + // * the enum is Rustified and we need a translated type for + // the repr attribute + // * the representation couldn't be determined from the C source + // * it was explicitly requested as a bindgen option + + let kind = match repr { + Some(repr) => match *repr.canonical_type(ctx).kind() { + TypeKind::Int(int_kind) => int_kind, + _ => panic!("Unexpected type as enum repr"), + }, + None => { + warn!( + "Guessing type of enum! Forward declarations of enums \ + shouldn't be legal!" + ); + IntKind::Int + } + }; + + let signed = kind.is_signed(); + let size = layout + .map(|l| l.size) + .or_else(|| kind.known_size()) + .unwrap_or(0); + + let translated = match (signed, size) { + (true, 1) => IntKind::I8, + (false, 1) => IntKind::U8, + (true, 2) => IntKind::I16, + (false, 2) => IntKind::U16, + (true, 4) => IntKind::I32, + (false, 4) => IntKind::U32, + (true, 8) => IntKind::I64, + (false, 8) => IntKind::U64, + _ => { + warn!( + "invalid enum decl: signed: {}, size: {}", + signed, size + ); + IntKind::I32 + } + }; + + repr_translated = + Type::new(None, None, TypeKind::Int(translated), false); + &repr_translated + } + }; + + let mut attrs = vec![]; + + // TODO(emilio): Delegate this to the builders? + match variation { + EnumVariation::Rust { non_exhaustive } => { + if non_exhaustive && + ctx.options().rust_features().non_exhaustive + { + attrs.push(attributes::non_exhaustive()); + } else if non_exhaustive && + !ctx.options().rust_features().non_exhaustive + { + panic!("The rust target you're using doesn't seem to support non_exhaustive enums"); + } + } + EnumVariation::NewType { .. } => { + if ctx.options().rust_features.repr_transparent { + attrs.push(attributes::repr("transparent")); + } else { + attrs.push(attributes::repr("C")); + } + } + _ => {} + }; + + if let Some(comment) = item.comment(ctx) { + attrs.push(attributes::doc(comment)); + } + + if item.must_use(ctx) { + attrs.push(attributes::must_use()); + } + + if !variation.is_const() { + let packed = false; // Enums can't be packed in Rust. + let mut derives = derives_of_item(item, ctx, packed); + // For backwards compat, enums always derive + // Clone/Eq/PartialEq/Hash, even if we don't generate those by + // default. + derives.insert( + DerivableTraits::CLONE | + DerivableTraits::HASH | + DerivableTraits::PARTIAL_EQ | + DerivableTraits::EQ, + ); + let mut derives: Vec<_> = derives.into(); + for derive in item.annotations().derives().iter() { + if !derives.contains(&derive.as_str()) { + derives.push(derive); + } + } + + // The custom derives callback may return a list of derive attributes; + // add them to the end of the list. + let custom_derives = ctx.options().all_callbacks(|cb| { + cb.add_derives(&DeriveInfo { + name: &name, + kind: DeriveTypeKind::Enum, + }) + }); + // In most cases this will be a no-op, since custom_derives will be empty. + derives.extend(custom_derives.iter().map(|s| s.as_str())); + + attrs.push(attributes::derives(&derives)); + } + + fn add_constant( + ctx: &BindgenContext, + enum_: &Type, + // Only to avoid recomputing every time. + enum_canonical_name: &Ident, + // May be the same as "variant" if it's because the + // enum is unnamed and we still haven't seen the + // value. + variant_name: &Ident, + referenced_name: &Ident, + enum_rust_ty: proc_macro2::TokenStream, + result: &mut CodegenResult<'_>, + ) { + let constant_name = if enum_.name().is_some() { + if ctx.options().prepend_enum_name { + format!("{}_{}", enum_canonical_name, variant_name) + } else { + format!("{}", variant_name) + } + } else { + format!("{}", variant_name) + }; + let constant_name = ctx.rust_ident(constant_name); + + result.push(quote! { + pub const #constant_name : #enum_rust_ty = + #enum_canonical_name :: #referenced_name ; + }); + } + + let repr = repr.to_rust_ty_or_opaque(ctx, item); + let has_typedef = ctx.is_enum_typedef_combo(item.id()); + + let mut builder = + EnumBuilder::new(&name, attrs, repr, variation, has_typedef); + + // A map where we keep a value -> variant relation. + let mut seen_values = HashMap::<_, Ident>::default(); + let enum_rust_ty = item.to_rust_ty_or_opaque(ctx, &()); + let is_toplevel = item.is_toplevel(ctx); + + // Used to mangle the constants we generate in the unnamed-enum case. + let parent_canonical_name = if is_toplevel { + None + } else { + Some(item.parent_id().canonical_name(ctx)) + }; + + let constant_mangling_prefix = if ctx.options().prepend_enum_name { + if enum_ty.name().is_none() { + parent_canonical_name.as_deref() + } else { + Some(&*name) + } + } else { + None + }; + + // NB: We defer the creation of constified variants, in case we find + // another variant with the same value (which is the common thing to + // do). + let mut constified_variants = VecDeque::new(); + + let mut iter = self.variants().iter().peekable(); + while let Some(variant) = + iter.next().or_else(|| constified_variants.pop_front()) + { + if variant.hidden() { + continue; + } + + if variant.force_constification() && iter.peek().is_some() { + constified_variants.push_back(variant); + continue; + } + + match seen_values.entry(variant.val()) { + Entry::Occupied(ref entry) => { + if variation.is_rust() { + let variant_name = ctx.rust_mangle(variant.name()); + let mangled_name = + if is_toplevel || enum_ty.name().is_some() { + variant_name + } else { + let parent_name = + parent_canonical_name.as_ref().unwrap(); + + Cow::Owned(format!( + "{}_{}", + parent_name, variant_name + )) + }; + + let existing_variant_name = entry.get(); + // Use associated constants for named enums. + if enum_ty.name().is_some() && + ctx.options().rust_features().associated_const + { + let enum_canonical_name = &ident; + let variant_name = + ctx.rust_ident_raw(&*mangled_name); + result.push(quote! { + impl #enum_rust_ty { + pub const #variant_name : #enum_rust_ty = + #enum_canonical_name :: #existing_variant_name ; + } + }); + } else { + add_constant( + ctx, + enum_ty, + &ident, + &Ident::new(&mangled_name, Span::call_site()), + existing_variant_name, + enum_rust_ty.clone(), + result, + ); + } + } else { + builder = builder.with_variant( + ctx, + variant, + constant_mangling_prefix, + enum_rust_ty.clone(), + result, + enum_ty.name().is_some(), + ); + } + } + Entry::Vacant(entry) => { + builder = builder.with_variant( + ctx, + variant, + constant_mangling_prefix, + enum_rust_ty.clone(), + result, + enum_ty.name().is_some(), + ); + + let variant_name = ctx.rust_ident(variant.name()); + + // If it's an unnamed enum, or constification is enforced, + // we also generate a constant so it can be properly + // accessed. + if (variation.is_rust() && enum_ty.name().is_none()) || + variant.force_constification() + { + let mangled_name = if is_toplevel { + variant_name.clone() + } else { + let parent_name = + parent_canonical_name.as_ref().unwrap(); + + Ident::new( + &format!("{}_{}", parent_name, variant_name), + Span::call_site(), + ) + }; + + add_constant( + ctx, + enum_ty, + &ident, + &mangled_name, + &variant_name, + enum_rust_ty.clone(), + result, + ); + } + + entry.insert(variant_name); + } + } + } + + let item = builder.build(ctx, enum_rust_ty, result); + result.push(item); + } +} + +/// Enum for the default type of macro constants. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum MacroTypeVariation { + /// Use i32 or i64 + Signed, + /// Use u32 or u64 + Unsigned, +} + +impl fmt::Display for MacroTypeVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::Signed => "signed", + Self::Unsigned => "unsigned", + }; + s.fmt(f) + } +} + +impl Default for MacroTypeVariation { + fn default() -> MacroTypeVariation { + MacroTypeVariation::Unsigned + } +} + +impl std::str::FromStr for MacroTypeVariation { + type Err = std::io::Error; + + /// Create a `MacroTypeVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "signed" => Ok(MacroTypeVariation::Signed), + "unsigned" => Ok(MacroTypeVariation::Unsigned), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid MacroTypeVariation. Accepted values ", + "are 'signed' and 'unsigned'" + ), + )), + } + } +} + +/// Enum for how aliases should be translated. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum AliasVariation { + /// Convert to regular Rust alias + TypeAlias, + /// Create a new type by wrapping the old type in a struct and using #[repr(transparent)] + NewType, + /// Same as NewStruct but also impl Deref to be able to use the methods of the wrapped type + NewTypeDeref, +} + +impl fmt::Display for AliasVariation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::TypeAlias => "type_alias", + Self::NewType => "new_type", + Self::NewTypeDeref => "new_type_deref", + }; + + s.fmt(f) + } +} + +impl Default for AliasVariation { + fn default() -> AliasVariation { + AliasVariation::TypeAlias + } +} + +impl std::str::FromStr for AliasVariation { + type Err = std::io::Error; + + /// Create an `AliasVariation` from a string. + fn from_str(s: &str) -> Result { + match s { + "type_alias" => Ok(AliasVariation::TypeAlias), + "new_type" => Ok(AliasVariation::NewType), + "new_type_deref" => Ok(AliasVariation::NewTypeDeref), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid AliasVariation. Accepted values ", + "are 'type_alias', 'new_type', and 'new_type_deref'" + ), + )), + } + } +} + +/// Enum for how non-`Copy` `union`s should be translated. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum NonCopyUnionStyle { + /// Wrap members in a type generated by `bindgen`. + BindgenWrapper, + /// Wrap members in [`::core::mem::ManuallyDrop`]. + /// + /// Note: `ManuallyDrop` was stabilized in Rust 1.20.0, do not use it if your + /// MSRV is lower. + ManuallyDrop, +} + +impl fmt::Display for NonCopyUnionStyle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Self::BindgenWrapper => "bindgen_wrapper", + Self::ManuallyDrop => "manually_drop", + }; + + s.fmt(f) + } +} + +impl Default for NonCopyUnionStyle { + fn default() -> Self { + Self::BindgenWrapper + } +} + +impl std::str::FromStr for NonCopyUnionStyle { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + match s { + "bindgen_wrapper" => Ok(Self::BindgenWrapper), + "manually_drop" => Ok(Self::ManuallyDrop), + _ => Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + concat!( + "Got an invalid NonCopyUnionStyle. Accepted values ", + "are 'bindgen_wrapper' and 'manually_drop'" + ), + )), + } + } +} + +/// Fallible conversion to an opaque blob. +/// +/// Implementors of this trait should provide the `try_get_layout` method to +/// fallibly get this thing's layout, which the provided `try_to_opaque` trait +/// method will use to convert the `Layout` into an opaque blob Rust type. +trait TryToOpaque { + type Extra; + + /// Get the layout for this thing, if one is available. + fn try_get_layout( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result; + + /// Do not override this provided trait method. + fn try_to_opaque( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result { + self.try_get_layout(ctx, extra) + .map(|layout| helpers::blob(ctx, layout)) + } +} + +/// Infallible conversion of an IR thing to an opaque blob. +/// +/// The resulting layout is best effort, and is unfortunately not guaranteed to +/// be correct. When all else fails, we fall back to a single byte layout as a +/// last resort, because C++ does not permit zero-sized types. See the note in +/// the `ToRustTyOrOpaque` doc comment about fallible versus infallible traits +/// and when each is appropriate. +/// +/// Don't implement this directly. Instead implement `TryToOpaque`, and then +/// leverage the blanket impl for this trait. +trait ToOpaque: TryToOpaque { + fn get_layout(&self, ctx: &BindgenContext, extra: &Self::Extra) -> Layout { + self.try_get_layout(ctx, extra) + .unwrap_or_else(|_| Layout::for_size(ctx, 1)) + } + + fn to_opaque( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> proc_macro2::TokenStream { + let layout = self.get_layout(ctx, extra); + helpers::blob(ctx, layout) + } +} + +impl ToOpaque for T where T: TryToOpaque {} + +/// Fallible conversion from an IR thing to an *equivalent* Rust type. +/// +/// If the C/C++ construct represented by the IR thing cannot (currently) be +/// represented in Rust (for example, instantiations of templates with +/// const-value generic parameters) then the impl should return an `Err`. It +/// should *not* attempt to return an opaque blob with the correct size and +/// alignment. That is the responsibility of the `TryToOpaque` trait. +trait TryToRustTy { + type Extra; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> error::Result; +} + +/// Fallible conversion to a Rust type or an opaque blob with the correct size +/// and alignment. +/// +/// Don't implement this directly. Instead implement `TryToRustTy` and +/// `TryToOpaque`, and then leverage the blanket impl for this trait below. +trait TryToRustTyOrOpaque: TryToRustTy + TryToOpaque { + type Extra; + + fn try_to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &::Extra, + ) -> error::Result; +} + +impl TryToRustTyOrOpaque for T +where + T: TryToRustTy + TryToOpaque, +{ + type Extra = E; + + fn try_to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &E, + ) -> error::Result { + self.try_to_rust_ty(ctx, extra).or_else(|_| { + if let Ok(layout) = self.try_get_layout(ctx, extra) { + Ok(helpers::blob(ctx, layout)) + } else { + Err(error::Error::NoLayoutForOpaqueBlob) + } + }) + } +} + +/// Infallible conversion to a Rust type, or an opaque blob with a best effort +/// of correct size and alignment. +/// +/// Don't implement this directly. Instead implement `TryToRustTy` and +/// `TryToOpaque`, and then leverage the blanket impl for this trait below. +/// +/// ### Fallible vs. Infallible Conversions to Rust Types +/// +/// When should one use this infallible `ToRustTyOrOpaque` trait versus the +/// fallible `TryTo{RustTy, Opaque, RustTyOrOpaque}` triats? All fallible trait +/// implementations that need to convert another thing into a Rust type or +/// opaque blob in a nested manner should also use fallible trait methods and +/// propagate failure up the stack. Only infallible functions and methods like +/// CodeGenerator implementations should use the infallible +/// `ToRustTyOrOpaque`. The further out we push error recovery, the more likely +/// we are to get a usable `Layout` even if we can't generate an equivalent Rust +/// type for a C++ construct. +trait ToRustTyOrOpaque: TryToRustTy + ToOpaque { + type Extra; + + fn to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &::Extra, + ) -> proc_macro2::TokenStream; +} + +impl ToRustTyOrOpaque for T +where + T: TryToRustTy + ToOpaque, +{ + type Extra = E; + + fn to_rust_ty_or_opaque( + &self, + ctx: &BindgenContext, + extra: &E, + ) -> proc_macro2::TokenStream { + self.try_to_rust_ty(ctx, extra) + .unwrap_or_else(|_| self.to_opaque(ctx, extra)) + } +} + +impl TryToOpaque for T +where + T: Copy + Into, +{ + type Extra = (); + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + ctx.resolve_item((*self).into()).try_get_layout(ctx, &()) + } +} + +impl TryToRustTy for T +where + T: Copy + Into, +{ + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + ctx.resolve_item((*self).into()).try_to_rust_ty(ctx, &()) + } +} + +impl TryToOpaque for Item { + type Extra = (); + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + self.kind().expect_type().try_get_layout(ctx, self) + } +} + +impl TryToRustTy for Item { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + self.kind().expect_type().try_to_rust_ty(ctx, self) + } +} + +impl TryToOpaque for Type { + type Extra = Item; + + fn try_get_layout( + &self, + ctx: &BindgenContext, + _: &Item, + ) -> error::Result { + self.layout(ctx).ok_or(error::Error::NoLayoutForOpaqueBlob) + } +} + +impl TryToRustTy for Type { + type Extra = Item; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + use self::helpers::ast_ty::*; + + match *self.kind() { + TypeKind::Void => Ok(c_void(ctx)), + // TODO: we should do something smart with nullptr, or maybe *const + // c_void is enough? + TypeKind::NullPtr => Ok(c_void(ctx).to_ptr(true)), + TypeKind::Int(ik) => { + match ik { + IntKind::Bool => Ok(quote! { bool }), + IntKind::Char { .. } => Ok(raw_type(ctx, "c_char")), + IntKind::SChar => Ok(raw_type(ctx, "c_schar")), + IntKind::UChar => Ok(raw_type(ctx, "c_uchar")), + IntKind::Short => Ok(raw_type(ctx, "c_short")), + IntKind::UShort => Ok(raw_type(ctx, "c_ushort")), + IntKind::Int => Ok(raw_type(ctx, "c_int")), + IntKind::UInt => Ok(raw_type(ctx, "c_uint")), + IntKind::Long => Ok(raw_type(ctx, "c_long")), + IntKind::ULong => Ok(raw_type(ctx, "c_ulong")), + IntKind::LongLong => Ok(raw_type(ctx, "c_longlong")), + IntKind::ULongLong => Ok(raw_type(ctx, "c_ulonglong")), + IntKind::WChar => { + let layout = self + .layout(ctx) + .expect("Couldn't compute wchar_t's layout?"); + let ty = Layout::known_type_for_size(ctx, layout.size) + .expect("Non-representable wchar_t?"); + let ident = ctx.rust_ident_raw(ty); + Ok(quote! { #ident }) + } + + IntKind::I8 => Ok(quote! { i8 }), + IntKind::U8 => Ok(quote! { u8 }), + IntKind::I16 => Ok(quote! { i16 }), + IntKind::U16 => Ok(quote! { u16 }), + IntKind::I32 => Ok(quote! { i32 }), + IntKind::U32 => Ok(quote! { u32 }), + IntKind::I64 => Ok(quote! { i64 }), + IntKind::U64 => Ok(quote! { u64 }), + IntKind::Custom { name, .. } => { + Ok(proc_macro2::TokenStream::from_str(name).unwrap()) + } + IntKind::U128 => { + Ok(if ctx.options().rust_features.i128_and_u128 { + quote! { u128 } + } else { + // Best effort thing, but wrong alignment + // unfortunately. + quote! { [u64; 2] } + }) + } + IntKind::I128 => { + Ok(if ctx.options().rust_features.i128_and_u128 { + quote! { i128 } + } else { + quote! { [u64; 2] } + }) + } + } + } + TypeKind::Float(fk) => { + Ok(float_kind_rust_type(ctx, fk, self.layout(ctx))) + } + TypeKind::Complex(fk) => { + let float_path = + float_kind_rust_type(ctx, fk, self.layout(ctx)); + + ctx.generated_bindgen_complex(); + Ok(if ctx.options().enable_cxx_namespaces { + quote! { + root::__BindgenComplex<#float_path> + } + } else { + quote! { + __BindgenComplex<#float_path> + } + }) + } + TypeKind::Function(ref fs) => { + // We can't rely on the sizeof(Option>) == + // sizeof(NonZero<_>) optimization with opaque blobs (because + // they aren't NonZero), so don't *ever* use an or_opaque + // variant here. + let ty = fs.try_to_rust_ty(ctx, &())?; + + let prefix = ctx.trait_prefix(); + Ok(quote! { + ::#prefix::option::Option<#ty> + }) + } + TypeKind::Array(item, len) | TypeKind::Vector(item, len) => { + let ty = item.try_to_rust_ty(ctx, &())?; + Ok(quote! { + [ #ty ; #len ] + }) + } + TypeKind::Enum(..) => { + let path = item.namespace_aware_canonical_path(ctx); + let path = proc_macro2::TokenStream::from_str(&path.join("::")) + .unwrap(); + Ok(quote!(#path)) + } + TypeKind::TemplateInstantiation(ref inst) => { + inst.try_to_rust_ty(ctx, item) + } + TypeKind::ResolvedTypeRef(inner) => inner.try_to_rust_ty(ctx, &()), + TypeKind::TemplateAlias(..) | + TypeKind::Alias(..) | + TypeKind::BlockPointer(..) => { + if self.is_block_pointer() && !ctx.options().generate_block { + let void = c_void(ctx); + return Ok(void.to_ptr(/* is_const = */ false)); + } + + if item.is_opaque(ctx, &()) && + item.used_template_params(ctx) + .into_iter() + .any(|param| param.is_template_param(ctx, &())) + { + self.try_to_opaque(ctx, item) + } else if let Some(ty) = self + .name() + .and_then(|name| utils::type_from_named(ctx, name)) + { + Ok(ty) + } else { + utils::build_path(item, ctx) + } + } + TypeKind::Comp(ref info) => { + let template_params = item.all_template_params(ctx); + if info.has_non_type_template_params() || + (item.is_opaque(ctx, &()) && !template_params.is_empty()) + { + return self.try_to_opaque(ctx, item); + } + + utils::build_path(item, ctx) + } + TypeKind::Opaque => self.try_to_opaque(ctx, item), + TypeKind::Pointer(inner) | TypeKind::Reference(inner) => { + let is_const = ctx.resolve_type(inner).is_const(); + + let inner = + inner.into_resolver().through_type_refs().resolve(ctx); + let inner_ty = inner.expect_type(); + + let is_objc_pointer = + matches!(inner_ty.kind(), TypeKind::ObjCInterface(..)); + + // Regardless if we can properly represent the inner type, we + // should always generate a proper pointer here, so use + // infallible conversion of the inner type. + let mut ty = inner.to_rust_ty_or_opaque(ctx, &()); + ty.append_implicit_template_params(ctx, inner); + + // Avoid the first function pointer level, since it's already + // represented in Rust. + if inner_ty.canonical_type(ctx).is_function() || is_objc_pointer + { + Ok(ty) + } else { + Ok(ty.to_ptr(is_const)) + } + } + TypeKind::TypeParam => { + let name = item.canonical_name(ctx); + let ident = ctx.rust_ident(name); + Ok(quote! { + #ident + }) + } + TypeKind::ObjCSel => Ok(quote! { + objc::runtime::Sel + }), + TypeKind::ObjCId => Ok(quote! { + id + }), + TypeKind::ObjCInterface(ref interface) => { + let name = ctx.rust_ident(interface.name()); + Ok(quote! { + #name + }) + } + ref u @ TypeKind::UnresolvedTypeRef(..) => { + unreachable!("Should have been resolved after parsing {:?}!", u) + } + } + } +} + +impl TryToOpaque for TemplateInstantiation { + type Extra = Item; + + fn try_get_layout( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + item.expect_type() + .layout(ctx) + .ok_or(error::Error::NoLayoutForOpaqueBlob) + } +} + +impl TryToRustTy for TemplateInstantiation { + type Extra = Item; + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> error::Result { + if self.is_opaque(ctx, item) { + return Err(error::Error::InstantiationOfOpaqueType); + } + + let def = self + .template_definition() + .into_resolver() + .through_type_refs() + .resolve(ctx); + + let mut ty = quote! {}; + let def_path = def.namespace_aware_canonical_path(ctx); + ty.append_separated( + def_path.into_iter().map(|p| ctx.rust_ident(p)), + quote!(::), + ); + + let def_params = def.self_template_params(ctx); + if def_params.is_empty() { + // This can happen if we generated an opaque type for a partial + // template specialization, and we've hit an instantiation of + // that partial specialization. + extra_assert!(def.is_opaque(ctx, &())); + return Err(error::Error::InstantiationOfOpaqueType); + } + + // TODO: If the definition type is a template class/struct + // definition's member template definition, it could rely on + // generic template parameters from its outer template + // class/struct. When we emit bindings for it, it could require + // *more* type arguments than we have here, and we will need to + // reconstruct them somehow. We don't have any means of doing + // that reconstruction at this time. + + let template_args = self + .template_arguments() + .iter() + .zip(def_params.iter()) + // Only pass type arguments for the type parameters that + // the def uses. + .filter(|&(_, param)| ctx.uses_template_parameter(def.id(), *param)) + .map(|(arg, _)| { + let arg = arg.into_resolver().through_type_refs().resolve(ctx); + let mut ty = arg.try_to_rust_ty(ctx, &())?; + ty.append_implicit_template_params(ctx, arg); + Ok(ty) + }) + .collect::>>()?; + + if template_args.is_empty() { + return Ok(ty); + } + + Ok(quote! { + #ty < #( #template_args ),* > + }) + } +} + +impl TryToRustTy for FunctionSig { + type Extra = (); + + fn try_to_rust_ty( + &self, + ctx: &BindgenContext, + _: &(), + ) -> error::Result { + // TODO: we might want to consider ignoring the reference return value. + let ret = utils::fnsig_return_ty(ctx, self); + let arguments = utils::fnsig_arguments(ctx, self); + + match self.abi(ctx, None) { + ClangAbi::Known(Abi::ThisCall) + if !ctx.options().rust_features().thiscall_abi => + { + warn!("Skipping function with thiscall ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::Vectorcall) + if !ctx.options().rust_features().vectorcall_abi => + { + warn!("Skipping function with vectorcall ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::CUnwind) + if !ctx.options().rust_features().c_unwind_abi => + { + warn!("Skipping function with C-unwind ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + ClangAbi::Known(Abi::EfiApi) + if !ctx.options().rust_features().abi_efiapi => + { + warn!("Skipping function with efiapi ABI that isn't supported by the configured Rust target"); + Ok(proc_macro2::TokenStream::new()) + } + abi => Ok(quote! { + unsafe extern #abi fn ( #( #arguments ),* ) #ret + }), + } + } +} + +impl CodeGenerator for Function { + type Extra = Item; + + /// If we've actually generated the symbol, the number of times we've seen + /// it. + type Return = Option; + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) -> Self::Return { + debug!("::codegen: item = {:?}", item); + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let is_internal = matches!(self.linkage(), Linkage::Internal); + + if is_internal && !ctx.options().wrap_static_fns { + // We can't do anything with Internal functions if we are not wrapping them so just + // avoid generating anything for them. + return None; + } + + // Pure virtual methods have no actual symbol, so we can't generate + // something meaningful for them. + let is_dynamic_function = match self.kind() { + FunctionKind::Method(ref method_kind) + if method_kind.is_pure_virtual() => + { + return None; + } + FunctionKind::Function => { + ctx.options().dynamic_library_name.is_some() + } + _ => false, + }; + + // Similar to static member variables in a class template, we can't + // generate bindings to template functions, because the set of + // instantiations is open ended and we have no way of knowing which + // monomorphizations actually exist. + if !item.all_template_params(ctx).is_empty() { + return None; + } + + let name = self.name(); + let mut canonical_name = item.canonical_name(ctx); + let mangled_name = self.mangled_name(); + + { + let seen_symbol_name = mangled_name.unwrap_or(&canonical_name); + + // TODO: Maybe warn here if there's a type/argument mismatch, or + // something? + if result.seen_function(seen_symbol_name) { + return None; + } + result.saw_function(seen_symbol_name); + } + + let signature_item = ctx.resolve_item(self.signature()); + let signature = signature_item.kind().expect_type().canonical_type(ctx); + let signature = match *signature.kind() { + TypeKind::Function(ref sig) => sig, + _ => panic!("Signature kind is not a Function: {:?}", signature), + }; + + let args = utils::fnsig_arguments(ctx, signature); + let ret = utils::fnsig_return_ty(ctx, signature); + + let mut attributes = vec![]; + + if ctx.options().rust_features().must_use_function { + let must_use = signature.must_use() || { + let ret_ty = signature + .return_type() + .into_resolver() + .through_type_refs() + .resolve(ctx); + ret_ty.must_use(ctx) + }; + + if must_use { + attributes.push(attributes::must_use()); + } + } + + if let Some(comment) = item.comment(ctx) { + attributes.push(attributes::doc(comment)); + } + + let abi = match signature.abi(ctx, Some(name)) { + ClangAbi::Known(Abi::ThisCall) + if !ctx.options().rust_features().thiscall_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "thiscall", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::Vectorcall) + if !ctx.options().rust_features().vectorcall_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "vectorcall", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::CUnwind) + if !ctx.options().rust_features().c_unwind_abi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "C-unwind", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::EfiApi) + if !ctx.options().rust_features().abi_efiapi => + { + unsupported_abi_diagnostic::( + name, + item.location(), + "efiapi", + ctx, + ); + return None; + } + ClangAbi::Known(Abi::Win64) if signature.is_variadic() => { + unsupported_abi_diagnostic::( + name, + item.location(), + "Win64", + ctx, + ); + return None; + } + ClangAbi::Unknown(unknown_abi) => { + panic!( + "Invalid or unknown abi {:?} for function {:?} ({:?})", + unknown_abi, canonical_name, self + ); + } + abi => abi, + }; + + if is_internal && ctx.options().wrap_static_fns { + result.items_to_serialize.push(item.id()); + } + + // Handle overloaded functions by giving each overload its own unique + // suffix. + let times_seen = result.overload_number(&canonical_name); + if times_seen > 0 { + write!(&mut canonical_name, "{}", times_seen).unwrap(); + } + + let mut has_link_name_attr = false; + if let Some(link_name) = self.link_name() { + attributes.push(attributes::link_name::(link_name)); + has_link_name_attr = true; + } else { + let link_name = mangled_name.unwrap_or(name); + if !is_dynamic_function && + !utils::names_will_be_identical_after_mangling( + &canonical_name, + link_name, + Some(abi), + ) + { + attributes.push(attributes::link_name::(link_name)); + has_link_name_attr = true; + } + } + + // Unfortunately this can't piggyback on the `attributes` list because + // the #[link(wasm_import_module)] needs to happen before the `extern + // "C"` block. It doesn't get picked up properly otherwise + let wasm_link_attribute = + ctx.options().wasm_import_module_name.as_ref().map(|name| { + quote! { #[link(wasm_import_module = #name)] } + }); + + if is_internal && ctx.options().wrap_static_fns && !has_link_name_attr { + let name = canonical_name.clone() + ctx.wrap_static_fns_suffix(); + attributes.push(attributes::link_name::(&name)); + } + + let ident = ctx.rust_ident(canonical_name); + let tokens = quote! { + #wasm_link_attribute + extern #abi { + #(#attributes)* + pub fn #ident ( #( #args ),* ) #ret; + } + }; + + // If we're doing dynamic binding generation, add to the dynamic items. + if is_dynamic_function { + let args_identifiers = + utils::fnsig_argument_identifiers(ctx, signature); + let ret_ty = utils::fnsig_return_ty(ctx, signature); + result.dynamic_items().push( + ident, + abi, + signature.is_variadic(), + ctx.options().dynamic_link_require_all, + args, + args_identifiers, + ret, + ret_ty, + attributes, + ctx, + ); + } else { + result.push(tokens); + } + Some(times_seen) + } +} + +fn unsupported_abi_diagnostic( + fn_name: &str, + _location: Option<&crate::clang::SourceLocation>, + abi: &str, + _ctx: &BindgenContext, +) { + warn!( + "Skipping {}function `{}` with the {} ABI that isn't supported by the configured Rust target", + if VARIADIC { "variadic " } else { "" }, + fn_name, + abi + ); + + #[cfg(feature = "experimental")] + if _ctx.options().emit_diagnostics { + use crate::diagnostics::{get_line, Diagnostic, Level, Slice}; + + let mut diag = Diagnostic::default(); + diag + .with_title(format!( + "The `{}` {}function uses the {} ABI which is not supported by the configured Rust target.", + fn_name, + if VARIADIC { "variadic " } else { "" }, + abi), Level::Warn) + .add_annotation("No code will be generated for this function.", Level::Warn) + .add_annotation(format!("The configured Rust version is {}.", String::from(_ctx.options().rust_target)), Level::Note); + + if let Some(loc) = _location { + let (file, line, col, _) = loc.location(); + + if let Some(filename) = file.name() { + if let Ok(Some(source)) = get_line(&filename, line) { + let mut slice = Slice::default(); + slice + .with_source(source) + .with_location(filename, line, col); + diag.add_slice(slice); + } + } + } + + diag.display() + } +} + +fn objc_method_codegen( + ctx: &BindgenContext, + method: &ObjCMethod, + methods: &mut Vec, + class_name: Option<&str>, + rust_class_name: &str, + prefix: &str, +) { + // This would ideally resolve the method into an Item, and use + // Item::process_before_codegen; however, ObjC methods are not currently + // made into function items. + let name = format!("{}::{}{}", rust_class_name, prefix, method.rust_name()); + if ctx.options().blocklisted_items.matches(name) { + return; + } + + let signature = method.signature(); + let fn_args = utils::fnsig_arguments(ctx, signature); + let fn_ret = utils::fnsig_return_ty(ctx, signature); + + let sig = if method.is_class_method() { + quote! { + ( #( #fn_args ),* ) #fn_ret + } + } else { + let self_arr = [quote! { &self }]; + let args = self_arr.iter().chain(fn_args.iter()); + quote! { + ( #( #args ),* ) #fn_ret + } + }; + + let methods_and_args = method.format_method_call(&fn_args); + + let body = { + let body = if method.is_class_method() { + let class_name = ctx.rust_ident( + class_name + .expect("Generating a class method without class name?"), + ); + quote!(msg_send!(class!(#class_name), #methods_and_args)) + } else { + quote!(msg_send!(*self, #methods_and_args)) + }; + + ctx.wrap_unsafe_ops(body) + }; + + let method_name = + ctx.rust_ident(format!("{}{}", prefix, method.rust_name())); + + methods.push(quote! { + unsafe fn #method_name #sig where ::Target: objc::Message + Sized { + #body + } + }); +} + +impl CodeGenerator for ObjCInterface { + type Extra = Item; + type Return = (); + + fn codegen( + &self, + ctx: &BindgenContext, + result: &mut CodegenResult<'_>, + item: &Item, + ) { + debug_assert!(item.is_enabled_for_codegen(ctx)); + + let mut impl_items = vec![]; + let rust_class_name = item.path_for_allowlisting(ctx)[1..].join("::"); + + for method in self.methods() { + objc_method_codegen( + ctx, + method, + &mut impl_items, + None, + &rust_class_name, + "", + ); + } + + for class_method in self.class_methods() { + let ambiquity = self + .methods() + .iter() + .map(|m| m.rust_name()) + .any(|x| x == class_method.rust_name()); + let prefix = if ambiquity { "class_" } else { "" }; + objc_method_codegen( + ctx, + class_method, + &mut impl_items, + Some(self.name()), + &rust_class_name, + prefix, + ); + } + + let trait_name = ctx.rust_ident(self.rust_name()); + let trait_constraints = quote! { + Sized + std::ops::Deref + }; + let trait_block = if self.is_template() { + let template_names: Vec = self + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + + quote! { + pub trait #trait_name <#(#template_names:'static),*> : #trait_constraints { + #( #impl_items )* + } + } + } else { + quote! { + pub trait #trait_name : #trait_constraints { + #( #impl_items )* + } + } + }; + + let class_name = ctx.rust_ident(self.name()); + if !self.is_category() && !self.is_protocol() { + let struct_block = quote! { + #[repr(transparent)] + #[derive(Debug, Copy, Clone)] + pub struct #class_name(pub id); + impl std::ops::Deref for #class_name { + type Target = objc::runtime::Object; + fn deref(&self) -> &Self::Target { + unsafe { + &*self.0 + } + } + } + unsafe impl objc::Message for #class_name { } + impl #class_name { + pub fn alloc() -> Self { + Self(unsafe { + msg_send!(class!(#class_name), alloc) + }) + } + } + }; + result.push(struct_block); + let mut protocol_set: HashSet = Default::default(); + for protocol_id in self.conforms_to.iter() { + protocol_set.insert(*protocol_id); + let protocol_name = ctx.rust_ident( + ctx.resolve_type(protocol_id.expect_type_id(ctx)) + .name() + .unwrap(), + ); + let impl_trait = quote! { + impl #protocol_name for #class_name { } + }; + result.push(impl_trait); + } + let mut parent_class = self.parent_class; + while let Some(parent_id) = parent_class { + let parent = parent_id + .expect_type_id(ctx) + .into_resolver() + .through_type_refs() + .resolve(ctx) + .expect_type() + .kind(); + + let parent = match parent { + TypeKind::ObjCInterface(ref parent) => parent, + _ => break, + }; + parent_class = parent.parent_class; + + let parent_name = ctx.rust_ident(parent.rust_name()); + let impl_trait = if parent.is_template() { + let template_names: Vec = parent + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + quote! { + impl <#(#template_names :'static),*> #parent_name <#(#template_names),*> for #class_name { + } + } + } else { + quote! { + impl #parent_name for #class_name { } + } + }; + result.push(impl_trait); + for protocol_id in parent.conforms_to.iter() { + if protocol_set.insert(*protocol_id) { + let protocol_name = ctx.rust_ident( + ctx.resolve_type(protocol_id.expect_type_id(ctx)) + .name() + .unwrap(), + ); + let impl_trait = quote! { + impl #protocol_name for #class_name { } + }; + result.push(impl_trait); + } + } + if !parent.is_template() { + let parent_struct_name = parent.name(); + let child_struct_name = self.name(); + let parent_struct = ctx.rust_ident(parent_struct_name); + let from_block = quote! { + impl From<#class_name> for #parent_struct { + fn from(child: #class_name) -> #parent_struct { + #parent_struct(child.0) + } + } + }; + result.push(from_block); + + let error_msg = format!( + "This {} cannot be downcasted to {}", + parent_struct_name, child_struct_name + ); + let try_into_block = quote! { + impl std::convert::TryFrom<#parent_struct> for #class_name { + type Error = &'static str; + fn try_from(parent: #parent_struct) -> Result<#class_name, Self::Error> { + let is_kind_of : bool = unsafe { msg_send!(parent, isKindOfClass:class!(#class_name))}; + if is_kind_of { + Ok(#class_name(parent.0)) + } else { + Err(#error_msg) + } + } + } + }; + result.push(try_into_block); + } + } + } + + if !self.is_protocol() { + let impl_block = if self.is_template() { + let template_names: Vec = self + .template_names + .iter() + .map(|g| ctx.rust_ident(g)) + .collect(); + quote! { + impl <#(#template_names :'static),*> #trait_name <#(#template_names),*> for #class_name { + } + } + } else { + quote! { + impl #trait_name for #class_name { + } + } + }; + result.push(impl_block); + } + + result.push(trait_block); + result.saw_objc(); + } +} + +pub(crate) fn codegen( + context: BindgenContext, +) -> Result<(proc_macro2::TokenStream, BindgenOptions), CodegenError> { + context.gen(|context| { + let _t = context.timer("codegen"); + let counter = Cell::new(0); + let mut result = CodegenResult::new(&counter); + + debug!("codegen: {:?}", context.options()); + + if context.options().emit_ir { + let codegen_items = context.codegen_items(); + for (id, item) in context.items() { + if codegen_items.contains(&id) { + println!("ir: {:?} = {:#?}", id, item); + } + } + } + + if let Some(path) = context.options().emit_ir_graphviz.as_ref() { + match dot::write_dot_file(context, path) { + Ok(()) => info!( + "Your dot file was generated successfully into: {}", + path + ), + Err(e) => warn!("{}", e), + } + } + + if let Some(spec) = context.options().depfile.as_ref() { + match spec.write(context.deps()) { + Ok(()) => info!( + "Your depfile was generated successfully into: {}", + spec.depfile_path.display() + ), + Err(e) => warn!("{}", e), + } + } + + context.resolve_item(context.root_module()).codegen( + context, + &mut result, + &(), + ); + + if let Some(ref lib_name) = context.options().dynamic_library_name { + let lib_ident = context.rust_ident(lib_name); + let dynamic_items_tokens = + result.dynamic_items().get_tokens(lib_ident, context); + result.push(dynamic_items_tokens); + } + + utils::serialize_items(&result, context)?; + + Ok(postprocessing::postprocessing( + result.items, + context.options(), + )) + }) +} + +pub(crate) mod utils { + use super::serialize::CSerialize; + use super::{error, CodegenError, CodegenResult, ToRustTyOrOpaque}; + use crate::ir::context::BindgenContext; + use crate::ir::function::{Abi, ClangAbi, FunctionSig}; + use crate::ir::item::{Item, ItemCanonicalPath}; + use crate::ir::ty::TypeKind; + use crate::{args_are_cpp, file_is_cpp}; + use std::borrow::Cow; + use std::io::Write; + use std::mem; + use std::path::PathBuf; + use std::str::FromStr; + + pub(super) fn serialize_items( + result: &CodegenResult, + context: &BindgenContext, + ) -> Result<(), CodegenError> { + if result.items_to_serialize.is_empty() { + return Ok(()); + } + + let path = context + .options() + .wrap_static_fns_path + .as_ref() + .map(PathBuf::from) + .unwrap_or_else(|| { + std::env::temp_dir().join("bindgen").join("extern") + }); + + let dir = path.parent().unwrap(); + + if !dir.exists() { + std::fs::create_dir_all(dir)?; + } + + let is_cpp = args_are_cpp(&context.options().clang_args) || + context + .options() + .input_headers + .iter() + .any(|h| file_is_cpp(h)); + + let source_path = path.with_extension(if is_cpp { "cpp" } else { "c" }); + + let mut code = Vec::new(); + + if !context.options().input_headers.is_empty() { + for header in &context.options().input_headers { + writeln!(code, "#include \"{}\"", header)?; + } + + writeln!(code)?; + } + + if !context.options().input_header_contents.is_empty() { + for (name, contents) in &context.options().input_header_contents { + writeln!(code, "// {}\n{}", name, contents)?; + } + + writeln!(code)?; + } + + writeln!(code, "// Static wrappers\n")?; + + for &id in &result.items_to_serialize { + let item = context.resolve_item(id); + item.serialize(context, (), &mut vec![], &mut code)?; + } + + std::fs::write(source_path, code)?; + + Ok(()) + } + + pub(crate) fn prepend_bitfield_unit_type( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let bitfield_unit_src = include_str!("./bitfield_unit.rs"); + let bitfield_unit_src = if ctx.options().rust_features().min_const_fn { + Cow::Borrowed(bitfield_unit_src) + } else { + Cow::Owned(bitfield_unit_src.replace("const fn ", "fn ")) + }; + let bitfield_unit_type = + proc_macro2::TokenStream::from_str(&bitfield_unit_src).unwrap(); + let bitfield_unit_type = quote!(#bitfield_unit_type); + + let items = vec![bitfield_unit_type]; + let old_items = mem::replace(result, items); + result.extend(old_items); + } + + pub(crate) fn prepend_objc_header( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let use_objc = if ctx.options().objc_extern_crate { + quote! { + #[macro_use] + extern crate objc; + } + } else { + quote! { + use objc::{self, msg_send, sel, sel_impl, class}; + } + }; + + let id_type = quote! { + #[allow(non_camel_case_types)] + pub type id = *mut objc::runtime::Object; + }; + + let items = vec![use_objc, id_type]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_block_header( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let use_block = if ctx.options().block_extern_crate { + quote! { + extern crate block; + } + } else { + quote! { + use block; + } + }; + + let items = vec![use_block]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_union_types( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let prefix = ctx.trait_prefix(); + + // If the target supports `const fn`, declare eligible functions + // as `const fn` else just `fn`. + let const_fn = if ctx.options().rust_features().min_const_fn { + quote! { const fn } + } else { + quote! { fn } + }; + + // TODO(emilio): The fmt::Debug impl could be way nicer with + // std::intrinsics::type_name, but... + let union_field_decl = quote! { + #[repr(C)] + pub struct __BindgenUnionField(::#prefix::marker::PhantomData); + }; + + let transmute = + ctx.wrap_unsafe_ops(quote!(::#prefix::mem::transmute(self))); + + let union_field_impl = quote! { + impl __BindgenUnionField { + #[inline] + pub #const_fn new() -> Self { + __BindgenUnionField(::#prefix::marker::PhantomData) + } + + #[inline] + pub unsafe fn as_ref(&self) -> &T { + #transmute + } + + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + #transmute + } + } + }; + + let union_field_default_impl = quote! { + impl ::#prefix::default::Default for __BindgenUnionField { + #[inline] + fn default() -> Self { + Self::new() + } + } + }; + + let union_field_clone_impl = quote! { + impl ::#prefix::clone::Clone for __BindgenUnionField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } + } + }; + + let union_field_copy_impl = quote! { + impl ::#prefix::marker::Copy for __BindgenUnionField {} + }; + + let union_field_debug_impl = quote! { + impl ::#prefix::fmt::Debug for __BindgenUnionField { + fn fmt(&self, fmt: &mut ::#prefix::fmt::Formatter<'_>) + -> ::#prefix::fmt::Result { + fmt.write_str("__BindgenUnionField") + } + } + }; + + // The actual memory of the filed will be hashed, so that's why these + // field doesn't do anything with the hash. + let union_field_hash_impl = quote! { + impl ::#prefix::hash::Hash for __BindgenUnionField { + fn hash(&self, _state: &mut H) { + } + } + }; + + let union_field_partialeq_impl = quote! { + impl ::#prefix::cmp::PartialEq for __BindgenUnionField { + fn eq(&self, _other: &__BindgenUnionField) -> bool { + true + } + } + }; + + let union_field_eq_impl = quote! { + impl ::#prefix::cmp::Eq for __BindgenUnionField { + } + }; + + let items = vec![ + union_field_decl, + union_field_impl, + union_field_default_impl, + union_field_clone_impl, + union_field_copy_impl, + union_field_debug_impl, + union_field_hash_impl, + union_field_partialeq_impl, + union_field_eq_impl, + ]; + + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_incomplete_array_types( + ctx: &BindgenContext, + result: &mut Vec, + ) { + let prefix = ctx.trait_prefix(); + + // If the target supports `const fn`, declare eligible functions + // as `const fn` else just `fn`. + let const_fn = if ctx.options().rust_features().min_const_fn { + quote! { const fn } + } else { + quote! { fn } + }; + + let incomplete_array_decl = quote! { + #[repr(C)] + #[derive(Default)] + pub struct __IncompleteArrayField( + ::#prefix::marker::PhantomData, [T; 0]); + }; + + let from_raw_parts = ctx.wrap_unsafe_ops(quote! ( + ::#prefix::slice::from_raw_parts(self.as_ptr(), len) + )); + let from_raw_parts_mut = ctx.wrap_unsafe_ops(quote! ( + ::#prefix::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + )); + + let incomplete_array_impl = quote! { + impl __IncompleteArrayField { + #[inline] + pub #const_fn new() -> Self { + __IncompleteArrayField(::#prefix::marker::PhantomData, []) + } + + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + #from_raw_parts + } + + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + #from_raw_parts_mut + } + } + }; + + let incomplete_array_debug_impl = quote! { + impl ::#prefix::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::#prefix::fmt::Formatter<'_>) + -> ::#prefix::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } + } + }; + + let items = vec![ + incomplete_array_decl, + incomplete_array_impl, + incomplete_array_debug_impl, + ]; + + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn prepend_complex_type( + result: &mut Vec, + ) { + let complex_type = quote! { + #[derive(PartialEq, Copy, Clone, Hash, Debug, Default)] + #[repr(C)] + pub struct __BindgenComplex { + pub re: T, + pub im: T + } + }; + + let items = vec![complex_type]; + let old_items = mem::replace(result, items); + result.extend(old_items.into_iter()); + } + + pub(crate) fn build_path( + item: &Item, + ctx: &BindgenContext, + ) -> error::Result { + let path = item.namespace_aware_canonical_path(ctx); + let tokens = + proc_macro2::TokenStream::from_str(&path.join("::")).unwrap(); + + Ok(tokens) + } + + fn primitive_ty( + ctx: &BindgenContext, + name: &str, + ) -> proc_macro2::TokenStream { + let ident = ctx.rust_ident_raw(name); + quote! { + #ident + } + } + + pub(crate) fn type_from_named( + ctx: &BindgenContext, + name: &str, + ) -> Option { + // FIXME: We could use the inner item to check this is really a + // primitive type but, who the heck overrides these anyway? + Some(match name { + "int8_t" => primitive_ty(ctx, "i8"), + "uint8_t" => primitive_ty(ctx, "u8"), + "int16_t" => primitive_ty(ctx, "i16"), + "uint16_t" => primitive_ty(ctx, "u16"), + "int32_t" => primitive_ty(ctx, "i32"), + "uint32_t" => primitive_ty(ctx, "u32"), + "int64_t" => primitive_ty(ctx, "i64"), + "uint64_t" => primitive_ty(ctx, "u64"), + + "size_t" if ctx.options().size_t_is_usize => { + primitive_ty(ctx, "usize") + } + "uintptr_t" => primitive_ty(ctx, "usize"), + + "ssize_t" if ctx.options().size_t_is_usize => { + primitive_ty(ctx, "isize") + } + "intptr_t" | "ptrdiff_t" => primitive_ty(ctx, "isize"), + _ => return None, + }) + } + + fn fnsig_return_ty_internal( + ctx: &BindgenContext, + sig: &FunctionSig, + include_arrow: bool, + ) -> proc_macro2::TokenStream { + if sig.is_divergent() { + return if include_arrow { + quote! { -> ! } + } else { + quote! { ! } + }; + } + + let canonical_type_kind = sig + .return_type() + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(ctx) + .kind() + .expect_type() + .kind(); + + if let TypeKind::Void = canonical_type_kind { + return if include_arrow { + quote! {} + } else { + quote! { () } + }; + } + + let ret_ty = sig.return_type().to_rust_ty_or_opaque(ctx, &()); + if include_arrow { + quote! { -> #ret_ty } + } else { + ret_ty + } + } + + pub(crate) fn fnsig_return_ty( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> proc_macro2::TokenStream { + fnsig_return_ty_internal(ctx, sig, /* include_arrow = */ true) + } + + pub(crate) fn fnsig_arguments( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> Vec { + use super::ToPtr; + + let mut unnamed_arguments = 0; + let mut args = sig + .argument_types() + .iter() + .map(|&(ref name, ty)| { + let arg_item = ctx.resolve_item(ty); + let arg_ty = arg_item.kind().expect_type(); + + // From the C90 standard[1]: + // + // A declaration of a parameter as "array of type" shall be + // adjusted to "qualified pointer to type", where the type + // qualifiers (if any) are those specified within the [ and ] of + // the array type derivation. + // + // [1]: http://c0x.coding-guidelines.com/6.7.5.3.html + let arg_ty = match *arg_ty.canonical_type(ctx).kind() { + TypeKind::Array(t, _) => { + let stream = + if ctx.options().array_pointers_in_arguments { + arg_ty.to_rust_ty_or_opaque(ctx, arg_item) + } else { + t.to_rust_ty_or_opaque(ctx, &()) + }; + stream.to_ptr(ctx.resolve_type(t).is_const()) + } + TypeKind::Pointer(inner) => { + let inner = ctx.resolve_item(inner); + let inner_ty = inner.expect_type(); + if let TypeKind::ObjCInterface(ref interface) = + *inner_ty.canonical_type(ctx).kind() + { + let name = ctx.rust_ident(interface.name()); + quote! { + #name + } + } else { + arg_item.to_rust_ty_or_opaque(ctx, &()) + } + } + _ => arg_item.to_rust_ty_or_opaque(ctx, &()), + }; + + let arg_name = match *name { + Some(ref name) => ctx.rust_mangle(name).into_owned(), + None => { + unnamed_arguments += 1; + format!("arg{}", unnamed_arguments) + } + }; + + assert!(!arg_name.is_empty()); + let arg_name = ctx.rust_ident(arg_name); + + quote! { + #arg_name : #arg_ty + } + }) + .collect::>(); + + if sig.is_variadic() { + args.push(quote! { ... }) + } + + args + } + + pub(crate) fn fnsig_argument_identifiers( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> Vec { + let mut unnamed_arguments = 0; + let args = sig + .argument_types() + .iter() + .map(|&(ref name, _ty)| { + let arg_name = match *name { + Some(ref name) => ctx.rust_mangle(name).into_owned(), + None => { + unnamed_arguments += 1; + format!("arg{}", unnamed_arguments) + } + }; + + assert!(!arg_name.is_empty()); + let arg_name = ctx.rust_ident(arg_name); + + quote! { + #arg_name + } + }) + .collect::>(); + + args + } + + pub(crate) fn fnsig_block( + ctx: &BindgenContext, + sig: &FunctionSig, + ) -> proc_macro2::TokenStream { + let args = sig.argument_types().iter().map(|&(_, ty)| { + let arg_item = ctx.resolve_item(ty); + + arg_item.to_rust_ty_or_opaque(ctx, &()) + }); + + let ret_ty = fnsig_return_ty_internal( + ctx, sig, /* include_arrow = */ false, + ); + quote! { + *const ::block::Block<(#(#args,)*), #ret_ty> + } + } + + // Returns true if `canonical_name` will end up as `mangled_name` at the + // machine code level, i.e. after LLVM has applied any target specific + // mangling. + pub(crate) fn names_will_be_identical_after_mangling( + canonical_name: &str, + mangled_name: &str, + call_conv: Option, + ) -> bool { + // If the mangled name and the canonical name are the same then no + // mangling can have happened between the two versions. + if canonical_name == mangled_name { + return true; + } + + // Working with &[u8] makes indexing simpler than with &str + let canonical_name = canonical_name.as_bytes(); + let mangled_name = mangled_name.as_bytes(); + + let (mangling_prefix, expect_suffix) = match call_conv { + Some(ClangAbi::Known(Abi::C)) | + // None is the case for global variables + None => { + (b'_', false) + } + Some(ClangAbi::Known(Abi::Stdcall)) => (b'_', true), + Some(ClangAbi::Known(Abi::Fastcall)) => (b'@', true), + + // This is something we don't recognize, stay on the safe side + // by emitting the `#[link_name]` attribute + Some(_) => return false, + }; + + // Check that the mangled name is long enough to at least contain the + // canonical name plus the expected prefix. + if mangled_name.len() < canonical_name.len() + 1 { + return false; + } + + // Return if the mangled name does not start with the prefix expected + // for the given calling convention. + if mangled_name[0] != mangling_prefix { + return false; + } + + // Check that the mangled name contains the canonical name after the + // prefix + if &mangled_name[1..canonical_name.len() + 1] != canonical_name { + return false; + } + + // If the given calling convention also prescribes a suffix, check that + // it exists too + if expect_suffix { + let suffix = &mangled_name[canonical_name.len() + 1..]; + + // The shortest suffix is "@0" + if suffix.len() < 2 { + return false; + } + + // Check that the suffix starts with '@' and is all ASCII decimals + // after that. + if suffix[0] != b'@' || !suffix[1..].iter().all(u8::is_ascii_digit) + { + return false; + } + } else if mangled_name.len() != canonical_name.len() + 1 { + // If we don't expect a prefix but there is one, we need the + // #[link_name] attribute + return false; + } + + true + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/merge_extern_blocks.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,72 @@ +use syn::{ + visit_mut::{visit_file_mut, visit_item_mod_mut, VisitMut}, + File, Item, ItemForeignMod, ItemMod, +}; + +pub(super) fn merge_extern_blocks(file: &mut File) { + Visitor.visit_file_mut(file) +} + +struct Visitor; + +impl VisitMut for Visitor { + fn visit_file_mut(&mut self, file: &mut File) { + visit_items(&mut file.items); + visit_file_mut(self, file) + } + + fn visit_item_mod_mut(&mut self, item_mod: &mut ItemMod) { + if let Some((_, ref mut items)) = item_mod.content { + visit_items(items); + } + visit_item_mod_mut(self, item_mod) + } +} + +fn visit_items(items: &mut Vec) { + // Keep all the extern blocks in a different `Vec` for faster search. + let mut extern_blocks = Vec::::new(); + + for item in std::mem::take(items) { + if let Item::ForeignMod(ItemForeignMod { + attrs, + abi, + brace_token, + unsafety, + items: extern_block_items, + }) = item + { + let mut exists = false; + for extern_block in &mut extern_blocks { + // Check if there is a extern block with the same ABI and + // attributes. + if extern_block.attrs == attrs && extern_block.abi == abi { + // Merge the items of the two blocks. + extern_block.items.extend_from_slice(&extern_block_items); + exists = true; + break; + } + } + // If no existing extern block had the same ABI and attributes, store + // it. + if !exists { + extern_blocks.push(ItemForeignMod { + attrs, + abi, + brace_token, + unsafety, + items: extern_block_items, + }); + } + } else { + // If the item is not an extern block, we don't have to do anything and just + // push it back. + items.push(item); + } + } + + // Move all the extern blocks alongside the rest of the items. + for extern_block in extern_blocks { + items.push(Item::ForeignMod(extern_block)); + } +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/mod.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,57 @@ +use proc_macro2::TokenStream; +use quote::ToTokens; +use syn::{parse2, File}; + +use crate::BindgenOptions; + +mod merge_extern_blocks; +mod sort_semantically; + +use merge_extern_blocks::merge_extern_blocks; +use sort_semantically::sort_semantically; + +struct PostProcessingPass { + should_run: fn(&BindgenOptions) -> bool, + run: fn(&mut File), +} + +// TODO: This can be a const fn when mutable references are allowed in const +// context. +macro_rules! pass { + ($pass:ident) => { + PostProcessingPass { + should_run: |options| options.$pass, + run: |file| $pass(file), + } + }; +} + +const PASSES: &[PostProcessingPass] = + &[pass!(merge_extern_blocks), pass!(sort_semantically)]; + +pub(crate) fn postprocessing( + items: Vec, + options: &BindgenOptions, +) -> TokenStream { + let items = items.into_iter().collect(); + let require_syn = PASSES.iter().any(|pass| (pass.should_run)(options)); + + if !require_syn { + return items; + } + + // This syn business is a hack, for now. This means that we are re-parsing already + // generated code using `syn` (as opposed to `quote`) because `syn` provides us more + // control over the elements. + // The `unwrap` here is deliberate because bindgen should generate valid rust items at all + // times. + let mut file = parse2::(items).unwrap(); + + for pass in PASSES { + if (pass.should_run)(options) { + (pass.run)(&mut file); + } + } + + file.into_token_stream() +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/postprocessing/sort_semantically.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,46 @@ +use syn::{ + visit_mut::{visit_file_mut, visit_item_mod_mut, VisitMut}, + File, Item, ItemMod, +}; + +pub(super) fn sort_semantically(file: &mut File) { + Visitor.visit_file_mut(file) +} + +struct Visitor; + +impl VisitMut for Visitor { + fn visit_file_mut(&mut self, file: &mut File) { + visit_items(&mut file.items); + visit_file_mut(self, file) + } + + fn visit_item_mod_mut(&mut self, item_mod: &mut ItemMod) { + if let Some((_, ref mut items)) = item_mod.content { + visit_items(items); + } + visit_item_mod_mut(self, item_mod) + } +} + +fn visit_items(items: &mut [Item]) { + items.sort_by_key(|item| match item { + Item::Type(_) => 0, + Item::Struct(_) => 1, + Item::Const(_) => 2, + Item::Fn(_) => 3, + Item::Enum(_) => 4, + Item::Union(_) => 5, + Item::Static(_) => 6, + Item::Trait(_) => 7, + Item::TraitAlias(_) => 8, + Item::Impl(_) => 9, + Item::Mod(_) => 10, + Item::Use(_) => 11, + Item::Verbatim(_) => 12, + Item::ExternCrate(_) => 13, + Item::ForeignMod(_) => 14, + Item::Macro(_) => 15, + _ => 18, + }); +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/serialize.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,358 @@ +use std::io::Write; + +use crate::callbacks::IntKind; + +use crate::ir::comp::CompKind; +use crate::ir::context::{BindgenContext, TypeId}; +use crate::ir::function::{Function, FunctionKind}; +use crate::ir::item::Item; +use crate::ir::item::ItemCanonicalName; +use crate::ir::item_kind::ItemKind; +use crate::ir::ty::{FloatKind, Type, TypeKind}; + +use super::CodegenError; + +fn get_loc(item: &Item) -> String { + item.location() + .map(|x| x.to_string()) + .unwrap_or_else(|| "unknown".to_owned()) +} + +pub(crate) trait CSerialize<'a> { + type Extra; + + fn serialize( + &self, + ctx: &BindgenContext, + extra: Self::Extra, + stack: &mut Vec, + writer: &mut W, + ) -> Result<(), CodegenError>; +} + +impl<'a> CSerialize<'a> for Item { + type Extra = (); + + fn serialize( + &self, + ctx: &BindgenContext, + (): Self::Extra, + stack: &mut Vec, + writer: &mut W, + ) -> Result<(), CodegenError> { + match self.kind() { + ItemKind::Function(func) => { + func.serialize(ctx, self, stack, writer) + } + kind => Err(CodegenError::Serialize { + msg: format!("Cannot serialize item kind {:?}", kind), + loc: get_loc(self), + }), + } + } +} + +impl<'a> CSerialize<'a> for Function { + type Extra = &'a Item; + + fn serialize( + &self, + ctx: &BindgenContext, + item: Self::Extra, + stack: &mut Vec, + writer: &mut W, + ) -> Result<(), CodegenError> { + if self.kind() != FunctionKind::Function { + return Err(CodegenError::Serialize { + msg: format!( + "Cannot serialize function kind {:?}", + self.kind(), + ), + loc: get_loc(item), + }); + } + + let signature = match ctx.resolve_type(self.signature()).kind() { + TypeKind::Function(signature) => signature, + _ => unreachable!(), + }; + + let name = self.name(); + + // Function argoments stored as `(name, type_id)` tuples. + let args = { + let mut count = 0; + + signature + .argument_types() + .iter() + .cloned() + .map(|(opt_name, type_id)| { + ( + opt_name.unwrap_or_else(|| { + let name = format!("arg_{}", count); + count += 1; + name + }), + type_id, + ) + }) + .collect::>() + }; + + // The name used for the wrapper self. + let wrap_name = format!("{}{}", name, ctx.wrap_static_fns_suffix()); + // The function's return type + let ret_ty = signature.return_type(); + + // Write `ret_ty wrap_name(args) { return name(arg_names)' }` + ret_ty.serialize(ctx, (), stack, writer)?; + write!(writer, " {}(", wrap_name)?; + serialize_args(&args, ctx, writer)?; + write!(writer, ") {{ return {}(", name)?; + serialize_sep(", ", args.iter(), ctx, writer, |(name, _), _, buf| { + write!(buf, "{}", name).map_err(From::from) + })?; + writeln!(writer, "); }}")?; + + Ok(()) + } +} + +impl<'a> CSerialize<'a> for TypeId { + type Extra = (); + + fn serialize( + &self, + ctx: &BindgenContext, + (): Self::Extra, + stack: &mut Vec, + writer: &mut W, + ) -> Result<(), CodegenError> { + let item = ctx.resolve_item(*self); + item.expect_type().serialize(ctx, item, stack, writer) + } +} + +impl<'a> CSerialize<'a> for Type { + type Extra = &'a Item; + + fn serialize( + &self, + ctx: &BindgenContext, + item: Self::Extra, + stack: &mut Vec, + writer: &mut W, + ) -> Result<(), CodegenError> { + match self.kind() { + TypeKind::Void => { + if self.is_const() { + write!(writer, "const ")?; + } + write!(writer, "void")? + } + TypeKind::NullPtr => { + if self.is_const() { + write!(writer, "const ")?; + } + write!(writer, "nullptr_t")? + } + TypeKind::Int(int_kind) => { + if self.is_const() { + write!(writer, "const ")?; + } + match int_kind { + IntKind::Bool => write!(writer, "bool")?, + IntKind::SChar => write!(writer, "signed char")?, + IntKind::UChar => write!(writer, "unsigned char")?, + IntKind::WChar => write!(writer, "wchar_t")?, + IntKind::Short => write!(writer, "short")?, + IntKind::UShort => write!(writer, "unsigned short")?, + IntKind::Int => write!(writer, "int")?, + IntKind::UInt => write!(writer, "unsigned int")?, + IntKind::Long => write!(writer, "long")?, + IntKind::ULong => write!(writer, "unsigned long")?, + IntKind::LongLong => write!(writer, "long long")?, + IntKind::ULongLong => write!(writer, "unsigned long long")?, + IntKind::Char { .. } => write!(writer, "char")?, + int_kind => { + return Err(CodegenError::Serialize { + msg: format!( + "Cannot serialize integer kind {:?}", + int_kind + ), + loc: get_loc(item), + }) + } + } + } + TypeKind::Float(float_kind) => { + if self.is_const() { + write!(writer, "const ")?; + } + match float_kind { + FloatKind::Float => write!(writer, "float")?, + FloatKind::Double => write!(writer, "double")?, + FloatKind::LongDouble => write!(writer, "long double")?, + FloatKind::Float128 => write!(writer, "__float128")?, + } + } + TypeKind::Complex(float_kind) => { + if self.is_const() { + write!(writer, "const ")?; + } + match float_kind { + FloatKind::Float => write!(writer, "float complex")?, + FloatKind::Double => write!(writer, "double complex")?, + FloatKind::LongDouble => { + write!(writer, "long double complex")? + } + FloatKind::Float128 => write!(writer, "__complex128")?, + } + } + TypeKind::Alias(type_id) => { + if let Some(name) = self.name() { + if self.is_const() { + write!(writer, "const {}", name)?; + } else { + write!(writer, "{}", name)?; + } + } else { + type_id.serialize(ctx, (), stack, writer)?; + } + } + TypeKind::Array(type_id, length) => { + type_id.serialize(ctx, (), stack, writer)?; + write!(writer, " [{}]", length)? + } + TypeKind::Function(signature) => { + if self.is_const() { + stack.push("const ".to_string()); + } + + signature.return_type().serialize( + ctx, + (), + &mut vec![], + writer, + )?; + + write!(writer, " (")?; + while let Some(item) = stack.pop() { + write!(writer, "{}", item)?; + } + write!(writer, ")")?; + + write!(writer, " (")?; + serialize_sep( + ", ", + signature.argument_types().iter(), + ctx, + writer, + |(name, type_id), ctx, buf| { + let mut stack = vec![]; + if let Some(name) = name { + stack.push(name.clone()); + } + type_id.serialize(ctx, (), &mut stack, buf) + }, + )?; + write!(writer, ")")? + } + TypeKind::ResolvedTypeRef(type_id) => { + if self.is_const() { + write!(writer, "const ")?; + } + type_id.serialize(ctx, (), stack, writer)? + } + TypeKind::Pointer(type_id) => { + if self.is_const() { + stack.push("*const ".to_owned()); + } else { + stack.push("*".to_owned()); + } + type_id.serialize(ctx, (), stack, writer)? + } + TypeKind::Comp(comp_info) => { + if self.is_const() { + write!(writer, "const ")?; + } + + let name = item.canonical_name(ctx); + + match comp_info.kind() { + CompKind::Struct => write!(writer, "struct {}", name)?, + CompKind::Union => write!(writer, "union {}", name)?, + }; + } + TypeKind::Enum(_enum_ty) => { + if self.is_const() { + write!(writer, "const ")?; + } + + let name = item.canonical_name(ctx); + write!(writer, "enum {}", name)?; + } + ty => { + return Err(CodegenError::Serialize { + msg: format!("Cannot serialize type kind {:?}", ty), + loc: get_loc(item), + }) + } + }; + + if !stack.is_empty() { + write!(writer, " ")?; + while let Some(item) = stack.pop() { + write!(writer, "{}", item)?; + } + } + + Ok(()) + } +} + +fn serialize_args( + args: &[(String, TypeId)], + ctx: &BindgenContext, + writer: &mut W, +) -> Result<(), CodegenError> { + if args.is_empty() { + write!(writer, "void")?; + } else { + serialize_sep( + ", ", + args.iter(), + ctx, + writer, + |(name, type_id), ctx, buf| { + type_id.serialize(ctx, (), &mut vec![name.clone()], buf) + }, + )?; + } + + Ok(()) +} + +fn serialize_sep< + W: Write, + F: FnMut(I::Item, &BindgenContext, &mut W) -> Result<(), CodegenError>, + I: Iterator, +>( + sep: &str, + mut iter: I, + ctx: &BindgenContext, + buf: &mut W, + mut f: F, +) -> Result<(), CodegenError> { + if let Some(item) = iter.next() { + f(item, ctx, buf)?; + let sep = sep.as_bytes(); + for item in iter { + buf.write_all(sep)?; + f(item, ctx, buf)?; + } + } + + Ok(()) +} diff -Nru clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs --- clamav-1.0.1+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs 1970-01-01 00:00:00.000000000 +0000 +++ clamav-1.0.2+dfsg/libclamav_rust/.cargo/vendor/bindgen/codegen/struct_layout.rs 2023-08-15 22:24:19.000000000 +0000 @@ -0,0 +1,444 @@ +//! Helpers for code generation that need struct layout + +use super::helpers; + +use crate::ir::comp::CompInfo; +use crate::ir::context::BindgenContext; +use crate::ir::layout::Layout; +use crate::ir::ty::{Type, TypeKind}; +use proc_macro2::{self, Ident, Span}; +use std::cmp; + +const MAX_GUARANTEED_ALIGN: usize = 8; + +/// Trace the layout of struct. +#[derive(Debug)] +pub(crate) struct StructLayoutTracker<'a> { + name: &'a str, + ctx: &'a BindgenContext, + comp: &'a CompInfo, + is_packed: bool, + known_type_layout: Option, + is_rust_union: bool, + can_copy_union_fields: bool, + latest_offset: usize, + padding_count: usize, + latest_field_layout: Option, + max_field_align: usize, + last_field_was_bitfield: bool, +} + +/// Returns a size aligned to a given value. +pub(crate) fn align_to(size: usize, align: usize) -> usize { + if align == 0 { + return size; + } + + let rem = size % align; + if rem == 0 { + return size; + } + + size + align - rem +} + +/// Returns the lower power of two byte count that can hold at most n bits. +pub(crate) fn bytes_from_bits_pow2(mut n: usize) -> usize { + if n == 0 { + return 0; + } + + if n <= 8 { + return 1; + } + + if !n.is_power_of_two() { + n = n.next_power_of_two(); + } + + n / 8 +} + +#[test] +fn test_align_to() { + assert_eq!(align_to(1, 1), 1); + assert_eq!(align_to(1, 2), 2); + assert_eq!(align_to(1, 4), 4); + assert_eq!(align_to(5, 1), 5); + assert_eq!(align_to(17, 4), 20); +} + +#[test] +fn test_bytes_from_bits_pow2() { + assert_eq!(bytes_from_bits_pow2(0), 0); + for i in 1..9 { + assert_eq!(bytes_from_bits_pow2(i), 1); + } + for i in 9..17 { + assert_eq!(bytes_from_bits_pow2(i), 2); + } + for i in 17..33 { + assert_eq!(bytes_from_bits_pow2(i), 4); + } +} + +impl<'a> StructLayoutTracker<'a> { + pub(crate) fn new( + ctx: &'a BindgenContext, + comp: &'a CompInfo, + ty: &'a Type, + name: &'a str, + ) -> Self { + let known_type_layout = ty.layout(ctx); + let is_packed = comp.is_packed(ctx, known_type_layout.as_ref()); + let (is_rust_union, can_copy_union_fields) = + comp.is_rust_union(ctx, known_type_layout.as_ref(), name); + StructLayoutTracker { + name, + ctx, + comp, + is_packed, + known_type_layout, + is_rust_union, + can_copy_union_fields, + latest_offset: 0, + padding_count: 0, + latest_field_layout: None, + max_field_align: 0, + last_field_was_bitfield: false, + } + } + + pub(crate) fn can_copy_union_fields(&self) -> bool { + self.can_copy_union_fields + } + + pub(crate) fn is_rust_union(&self) -> bool { + self.is_rust_union + } + + pub(crate) fn saw_vtable(&mut self) { + debug!("saw vtable for {}", self.name); + + let ptr_size = self.ctx.target_pointer_size(); + self.latest_offset += ptr_size; + self.latest_field_layout = Some(Layout::new(ptr_size, ptr_size)); + self.max_field_align = ptr_size; + } + + pub(crate) fn saw_base(&mut self, base_ty: &Type) { + debug!("saw base for {}", self.name); + if let Some(layout) = base_ty.layout(self.ctx) { + self.align_to_latest_field(layout); + + self.latest_offset += self.padding_bytes(layout) + layout.size; + self.latest_field_layout = Some(layout); + self.max_field_align = cmp::max(self.max_field_align, layout.align); + } + } + + pub(crate) fn saw_bitfield_unit(&mut self, layout: Layout) { + debug!("saw bitfield unit for {}: {:?}", self.name, layout); + + self.align_to_latest_field(layout); + + self.latest_offset += layout.size; + + debug!( + "Offset: : {} -> {}", + self.latest_offset - layout.size, + self.latest_offset + ); + + self.latest_field_layout = Some(layout); + self.last_field_was_bitfield = true; + // NB: We intentionally don't update the max_field_align here, since our + // bitfields code doesn't necessarily guarantee it, so we need to + // actually generate the dummy alignment. + } + + /// Returns a padding field if necessary for a given new field _before_ + /// adding that field. + pub(crate) fn saw_field( + &mut self, + field_name: &str, + field_ty: &Type, + field_offset: Option, + ) -> Option { + let mut field_layout = field_ty.layout(self.ctx)?; + + if let TypeKind::Array(inner, len) = + *field_ty.canonical_type(self.ctx).kind() + { + // FIXME(emilio): As an _ultra_ hack, we correct the layout returned + // by arrays of structs that have a bigger alignment than what we + // can support. + // + // This means that the structs in the array are super-unsafe to + // access, since they won't be properly aligned, but there's not too + // much we can do about it. + if let Some(layout) = self.ctx.resolve_type(inner).layout(self.ctx) + { + if layout.align > MAX_GUARANTEED_ALIGN { + field_layout.size = + align_to(layout.size, layout.align) * len; + field_layout.align = MAX_GUARANTEED_ALIGN; + } + } + } + self.saw_field_with_layout(field_name, field_layout, field_offset) + } + + pub(crate) fn saw_field_with_layout( + &mut self, + field_name: &str, + field_layout: Layout, + field_offset: Option, + ) -> Option { + let will_merge_with_bitfield = self.align_to_latest_field(field_layout); + + let is_union = self.comp.is_union(); + let padding_bytes = match field_offset { + Some(offset) if offset / 8 > self.latest_offset => { + offset / 8 - self.latest_offset + } + _ => { + if will_merge_with_bitfield || + field_layout.align == 0 || + is_union + { + 0 + } else if !self.is_packed { + self.padding_bytes(field_layout) + } else if let Some(l) = self.known_type_layout { + self.padding_bytes(l) + } else { + 0 + } + } + }; + + self.latest_offset += padding_bytes; + + let padding_layout = if self.is_packed || is_union { + None + } else { + let force_padding = self.ctx.options().force_explicit_padding; + + // Otherwise the padding is useless. + let need_padding = force_padding || + padding_bytes >= field_layout.align || + field_layout.align > MAX_GUARANTEED_ALIGN; + + debug!( + "Offset: : {} -> {}", + self.latest_offset - padding_bytes, + self.latest_offset + ); + + debug!( + "align field {} to {}/{} with {} padding bytes {:?}", + field_name, + self.latest_offset, + field_offset.unwrap_or(0) / 8, + padding_bytes, + field_layout + ); + + let padding_align = if force_padding { + 1 + } else { + cmp::min(field_layout.align, MAX_GUARANTEED_ALIGN) + }; + + if need_padding && padding_bytes != 0 { + Some(Layout::new(padding_bytes, padding_align)) + } else { + None + } + }; + + self.latest_offset += field_layout.size; + self.latest_field_layout = Some(field_layout); + self.max_field_align = + cmp::max(self.max_field_align, field_layout.align); + self.last_field_was_bitfield = false; + + debug!( + "Offset: {}: {} -> {}", + field_name, + self.latest_offset - field_layout.size, + self.latest_offset + ); + + padding_layout.map(|layout| self.padding_field(layout)) + } + + pub(crate) fn add_tail_padding( + &mut self, + comp_name: &str, + comp_layout: Layout, + ) -> Option { + // Only emit an padding field at the end of a struct if the + // user configures explicit padding. + if !self.ctx.options().force_explicit_padding { + return None; + } + + // Padding doesn't make sense for rust unions. + if self.is_rust_union { + return None; + } + + if self.latest_offset == comp_layout.size { + // This struct does not contain tail padding. + return None; + } + + trace!( + "need a tail padding field for {}: offset {} -> size {}", + comp_name, + self.latest_offset, + comp_layout.size + ); + let size = comp_layout.size - self.latest_offset; + Some(self.padding_field(Layout::new(size, 0))) + } + + pub(crate) fn pad_struct( + &mut self, + layout: Layout, + ) -> Option { + debug!( + "pad_struct:\n\tself = {:#?}\n\tlayout = {:#?}", + self, layout + ); + + if layout.size < self.latest_offset { + warn!( + "Calculated wrong layout for {}, too more {} bytes", + self.name, + self.latest_offset - layout.size + ); + return None; + } + + let padding_bytes = layout.size - self.latest_offset; + if padding_bytes == 0 { + return None; + } + + let repr_align = self.ctx.options().rust_features().repr_align; + + // We always pad to get to the correct size if the struct is one of + // those we can't align properly. + // + // Note that if the last field we saw was a bitfield, we may need to pad + // regardless, because bitfields don't respect alignment as strictly as + // other fields. + if padding_bytes >= layout.align || + (self.last_field_was_bitfield && + padding_bytes >= self.latest_field_layout.unwrap().align) || + (!repr_align && layout.align > MAX_GUARANTEED_ALIGN) + { + let layout = if self.is_packed { + Layout::new(padding_bytes, 1) + } else if self.last_field_was_bitfield || + layout.align > MAX_GUARANTEED_ALIGN + { + // We've already given up on alignment here. + Layout::for_size(self.ctx, padding_bytes) + } else { + Layout::new(padding_bytes, layout.align) + }; + + debug!("pad bytes to struct {}, {:?}", self.name, layout); + + Some(self.padding_field(layout)) + } else { + None + } + } + + pub(crate) fn requires_explicit_align(&self, layout: Layout) -> bool { + let repr_align = self.ctx.options().rust_features().repr_align; + + // Always force explicit repr(align) for stuff more than 16-byte aligned + // to work-around https://github.com/rust-lang/rust/issues/54341. + // + // Worst-case this just generates redundant alignment attributes. + if repr_align && self.max_field_align >= 16 { + return true; + } + + if self.max_field_align >= layout.align { + return false; + } + + // We can only generate up-to a 8-bytes of alignment unless we support + // repr(align). + repr_align || layout.align <= MAX_GUARANTEED_ALIGN + } + + fn padding_bytes(&self, layout: Layout) -> usize { + align_to(self.latest_offset, layout.align) - self.latest_offset + } + + fn padding_field(&mut self, layout: Layout) -> proc_macro2::TokenStream { + let ty = helpers::blob(self.ctx, layout); + let padding_count = self.padding_count; + + self.padding_count += 1; + + let padding_field_name = Ident::new( + &format!("__bindgen_padding_{}", padding_count), + Span::call_site(),